From 9072ca5f4e0342b426cbc96c9d2783d98c5815a2 Mon Sep 17 00:00:00 2001 From: Jonathan Buch Date: Thu, 29 Aug 2024 08:52:45 +0200 Subject: [PATCH 1/8] grafana, add basic connector --- pkg/connectors/grafana/api.go | 47 ++++++ pkg/connectors/grafana/connector.go | 144 +++++++++++++++++ pkg/connectors/grafana/connector_test.go | 194 +++++++++++++++++++++++ 3 files changed, 385 insertions(+) create mode 100644 pkg/connectors/grafana/api.go create mode 100644 pkg/connectors/grafana/connector.go create mode 100644 pkg/connectors/grafana/connector_test.go diff --git a/pkg/connectors/grafana/api.go b/pkg/connectors/grafana/api.go new file mode 100644 index 0000000..9bc3b40 --- /dev/null +++ b/pkg/connectors/grafana/api.go @@ -0,0 +1,47 @@ +package grafana + +type alertingRulesResult struct { + Status string `json:"status"` + Data alertingRulesData `json:"data"` +} + +type alertingRulesData struct { + Groups []alertingRulesGroup `json:"groups"` +} + +type alertingRulesGroup struct { + Name string `json:"name"` + File string `json:"file"` + Rules []alertingRule `json:"rules"` +} + +type alertingRule struct { + State alertingState `json:"state"` + Name string `json:"name"` + ActiveAt string `json:"activeAt"` + Health string `json:"health"` + Annotations map[string]string `json:"annotations"` + Labels map[string]string `json:"labels"` + Alerts []alert `json:"alerts"` + Type string `json:"type"` +} + +type alert struct { + Labels map[string]string `json:"labels"` + Annotations map[string]string `json:"annotations"` + State string `json:"state"` + ActiveAt string `json:"activeAt"` + Value int `json:"value"` +} + +type alertingState = string + +const ( + alertingStateFiring = "firing" + alertingStateAlerting = "alerting" + alertingStateInactive = "inactive" + alertingStateNoData = "nodata" + alertingStateNormal = "Normal" + alertingStatePending = "pending" + alertingStateError = "error" +) diff --git a/pkg/connectors/grafana/connector.go b/pkg/connectors/grafana/connector.go new file mode 100644 index 0000000..d489be1 --- /dev/null +++ b/pkg/connectors/grafana/connector.go @@ -0,0 +1,144 @@ +package grafana + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + html "html/template" + "io" + "log/slog" + "net/http" + "strings" + "time" + + "github.com/synyx/tuwat/pkg/connectors" + "github.com/synyx/tuwat/pkg/connectors/common" +) + +type Connector struct { + config Config + client *http.Client +} + +type Config struct { + Tag string + Cluster string + common.HTTPConfig +} + +func NewConnector(cfg *Config) *Connector { + c := &Connector{config: *cfg, client: cfg.HTTPConfig.Client()} + + return c +} + +func (c *Connector) Tag() string { + return c.config.Tag +} + +func (c *Connector) Collect(ctx context.Context) ([]connectors.Alert, error) { + sourceAlertGroups, err := c.collectAlerts(ctx) + if err != nil { + return nil, err + } + + var alerts []connectors.Alert + + for _, sourceAlertGroup := range sourceAlertGroups { + rule := sourceAlertGroup.Rules[0] + sourceAlert := rule.Alerts[0] + + state := grafanaStateToState(sourceAlert.State) + if state == connectors.OK { + continue + } + + labels := map[string]string{ + "Folder": sourceAlert.Labels["grafana_folder"], + "Alertname": sourceAlert.Labels["alertname"], + "Contacts": sourceAlert.Labels["__contacts__"], + } + + alert := connectors.Alert{ + Labels: labels, + Start: parseTime(sourceAlert.ActiveAt), + State: state, + Description: rule.Name, + Details: rule.Annotations["message"], + Links: []html.HTML{ + html.HTML("🏠"), + html.HTML("🏠"), + }, + } + + alerts = append(alerts, alert) + } + + return alerts, nil +} + +func grafanaStateToState(state string) connectors.State { + switch strings.ToLower(state) { + case alertingStateAlerting: + return connectors.Critical + default: + return connectors.OK + } +} + +func (c *Connector) String() string { + return fmt.Sprintf("Grafana (%s)", c.config.URL) +} + +func (c *Connector) collectAlerts(ctx context.Context) ([]alertingRulesGroup, error) { + res, err := c.get(ctx, "/api/prometheus/grafana/api/v1/rules") + if err != nil { + return nil, err + } + defer res.Body.Close() + + b, _ := io.ReadAll(res.Body) + buf := bytes.NewBuffer(b) + + decoder := json.NewDecoder(buf) + + var response alertingRulesResult + err = decoder.Decode(&response) + if err != nil { + slog.ErrorContext(ctx, "Cannot parse", + slog.String("url", c.config.URL), + slog.String("data", buf.String()), + slog.Any("status", res.StatusCode), + slog.Any("error", err)) + return nil, err + } + + return response.Data.Groups, nil +} + +func (c *Connector) get(ctx context.Context, endpoint string) (*http.Response, error) { + + slog.DebugContext(ctx, "getting alerts", slog.String("url", c.config.URL+endpoint)) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.config.URL+endpoint, nil) + if err != nil { + return nil, err + } + + req.Header.Set("Accept", "application/json") + + res, err := c.client.Do(req) + if err != nil { + return nil, err + } + + return res, nil +} +func parseTime(timeField string) time.Time { + t, err := time.Parse("2006-01-02T15:04:05.999-07:00", timeField) + if err != nil { + return time.Time{} + } + return t +} diff --git a/pkg/connectors/grafana/connector_test.go b/pkg/connectors/grafana/connector_test.go new file mode 100644 index 0000000..83486b0 --- /dev/null +++ b/pkg/connectors/grafana/connector_test.go @@ -0,0 +1,194 @@ +package grafana + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "regexp" + "strings" + "testing" + + "github.com/synyx/tuwat/pkg/connectors" + "github.com/synyx/tuwat/pkg/connectors/common" +) + +func mockConnector() (connectors.Connector, func()) { + testServer := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) { + res.WriteHeader(http.StatusOK) + _, _ = res.Write([]byte(mockResponse)) + })) + + cfg := Config{ + Tag: "test", + HTTPConfig: common.HTTPConfig{ + URL: testServer.URL, + }, + } + + return NewConnector(&cfg), func() { testServer.Close() } +} + +func TestConnector(t *testing.T) { + connector, closer := mockConnector() + defer closer() + alerts, err := connector.Collect(context.Background()) + if err != nil { + t.Fatal(err) + } + + if alerts == nil || len(alerts) != 3 { + t.Error("There should be alerts") + } +} + +func TestDecode(t *testing.T) { + var foo []alert + err := json.Unmarshal([]byte(mockResponse), &foo) + if err != nil { + t.Fatal(err) + } +} + +func TestEncodingOfLinks(t *testing.T) { + connector, closer := mockConnector() + defer closer() + alerts, _ := connector.Collect(context.Background()) + + alert := alerts[0] + for _, link := range alert.Links { + if !strings.Contains(string(link), "://") { + t.Error("There should be a non encoded url") + } + } +} + +const mockResponse = ` +[ + { + "annotations": { + "description": "There were ` + "`" + `1` + "`" + ` messages in queue ` + "`" + `meister` + "`" + ` vhost ` + "`" + `/` + "`" + `\nfor the last 15 minutes in app cluster ` + "`" + `` + "`" + ` in namespace ` + "`" + `app-stage` + "`" + `.\n", + "runbook": "https://gitlab.example.com/infra/k8s/app/-/tree/master/documentation/runbook.md#app-consumerless-queues", + "summary": "app cluster has queues with messages but no consumers." + }, + "endsAt": "2022-09-25T16:55:08.801Z", + "fingerprint": "006eeab040ebbfb6", + "receivers": [ + { + "name": "ops-mails" + } + ], + "startsAt": "2022-04-11T08:01:38.801Z", + "status": { + "inhibitedBy": [], + "silencedBy": [], + "state": "active" + }, + "updatedAt": "2022-09-25T16:51:09.004Z", + "generatorURL": "https://prometheus.example.com/graph?g0.expr=app_queue_messages%7Bqueue%21~%22.%2A%28dead%7Cdlq%7Cdebug%7Cmobile.sync%7Ctest%7Cmarvin%7Cresend%7Cmobile.events%29.%2A%22%7D+%3E+0+unless+app_queue_consumer_capacity+%3E+0&g0.tab=1", + "labels": { + "alertname": "appQueueMessagesNoConsumer", + "cluster": "prometheus-apps", + "container": "app", + "endpoint": "prometheus", + "instance": "100.111.11.198:15692", + "job": "app-stage/app", + "namespace": "app-stage", + "pod": "app-server-0", + "prometheus": "apps-monitoring/apps", + "queue": "meister", + "severity": "warning", + "vhost": "/" + } + }, + { + "annotations": { + "description": "constraint violation of kind ContainerLimits in Pod gitlab-agent-7f967f9945-9k8p4 in namespace api-gateway-prod", + "summary": "container has no resource limits" + }, + "endsAt": "2022-09-25T16:54:42.994Z", + "fingerprint": "00ae4411e137c417", + "receivers": [ + { + "name": "ops-mails" + } + ], + "startsAt": "2022-08-30T09:40:12.994Z", + "status": { + "inhibitedBy": [], + "silencedBy": [], + "state": "active" + }, + "updatedAt": "2022-09-25T16:50:43.008Z", + "generatorURL": "https://prometheus.example.com/graph?g0.expr=opa_scorecard_constraint_violations+%3E+0&g0.tab=1", + "labels": { + "alertname": "GatekeeperConstraintViolations", + "cluster": "prometheus-apps", + "context": "gatekeeper", + "endpoint": "9141-9141", + "instance": "100.111.12.32:9141", + "job": "opa-exporter", + "kind": "ContainerLimits", + "name": "pod-container-limits", + "namespace": "monitoring", + "pod": "opa-exporter-6fc88b44f4-6xgnn", + "prometheus": "apps-monitoring/apps", + "service": "opa-exporter", + "severity": "warning", + "violating_kind": "Pod", + "violating_name": "gitlab-agent-7f967f9945-9k8p4", + "violating_namespace": "api-gateway-prod", + "violation_enforcement": "warn", + "violation_msg": "container has no resource limits" + } + }, + { + "annotations": { + "description": "There were ` + "`" + `1` + "`" + ` dead lettered messages in queue ` + "`" + `selfcheckin.mot.checkin.command.deadletter` + "`" + ` vhost ` + "`" + `/` + "`" + `\nfor the last 15 minutes in app cluster ` + "`" + `` + "`" + ` in namespace ` + "`" + `app-stage` + "`" + `.\n", + "runbook": "https://gitlab.example.com/infra/k8s/app/-/tree/master/documentation/runbook.md#app-deadletter-queue", + "summary": "app cluster has dead letter messages" + }, + "endsAt": "2022-09-25T16:55:08.801Z", + "fingerprint": "01b99423f38362e5", + "receivers": [ + { + "name": "ops-mails" + } + ], + "startsAt": "2022-06-01T14:10:08.801Z", + "status": { + "inhibitedBy": [], + "silencedBy": [], + "state": "active" + }, + "updatedAt": "2022-09-25T16:51:08.900Z", + "generatorURL": "https://prometheus.example.com/graph?g0.expr=app_queue_messages%7Bqueue%3D~%22.%2A%28dead%7Cdlq%29.%2A%22%7D+%3E+0&g0.tab=1", + "labels": { + "alertname": "appDeadletterQueueMessages", + "cluster": "prometheus-apps", + "container": "app", + "endpoint": "prometheus", + "instance": "100.111.11.198:15692", + "job": "app-stage/app", + "namespace": "app-stage", + "pod": "app-server-0", + "prometheus": "apps-monitoring/apps", + "queue": "selfcheckin.mot.checkin.command.deadletter", + "severity": "warning", + "vhost": "/" + } + } +] +` + +func TestConnector_Collect(t *testing.T) { + r := regexp.MustCompile(`in namespace\W+([a-zA-Z-0-9_-]+)`) + details := "constraint violation of kind ContainerLimits in Pod gitlab-agent-landingpage-659cf9567d-kkxsl in namespace api-gateway-stage\n\t\t" + where := "" + if s := r.FindAllStringSubmatch(details, 1); len(s) > 0 { + where = s[0][1] + } + if where != "api-gateway-stage" { + t.Fail() + } +} From 1cac2e0b041b7ff50271505f502d5cab14a6785d Mon Sep 17 00:00:00 2001 From: Jonathan Buch Date: Fri, 30 Aug 2024 15:22:37 +0200 Subject: [PATCH 2/8] grafana, integrate connector and make configurable --- pkg/config/config.go | 5 + pkg/connectors/grafana/connector_test.go | 219 ++++++++--------------- 2 files changed, 79 insertions(+), 145 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 1405bcb..bc21bff 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -19,6 +19,7 @@ import ( "github.com/synyx/tuwat/pkg/connectors/example" "github.com/synyx/tuwat/pkg/connectors/github" "github.com/synyx/tuwat/pkg/connectors/gitlabmr" + "github.com/synyx/tuwat/pkg/connectors/grafana" "github.com/synyx/tuwat/pkg/connectors/graylog" "github.com/synyx/tuwat/pkg/connectors/icinga2" "github.com/synyx/tuwat/pkg/connectors/nagiosapi" @@ -91,6 +92,7 @@ type rootConfig struct { Orderview []orderview.Config `toml:"orderview"` Example []example.Config `toml:"example"` Graylogs []graylog.Config `toml:"graylog"` + Grafanas []grafana.Config `toml:"grafana"` } func NewConfiguration() (config *Config, err error) { @@ -244,6 +246,9 @@ func (cfg *Config) configureMain(rootConfig *rootConfig) (err error) { for _, connectorConfig := range rootConfig.Graylogs { cfg.Connectors = append(cfg.Connectors, graylog.NewConnector(&connectorConfig)) } + for _, connectorConfig := range rootConfig.Grafanas { + cfg.Connectors = append(cfg.Connectors, grafana.NewConnector(&connectorConfig)) + } // Add template for cfg.WhereTemplate, err = template.New("where"). diff --git a/pkg/connectors/grafana/connector_test.go b/pkg/connectors/grafana/connector_test.go index 83486b0..89e4569 100644 --- a/pkg/connectors/grafana/connector_test.go +++ b/pkg/connectors/grafana/connector_test.go @@ -2,33 +2,15 @@ package grafana import ( "context" - "encoding/json" "net/http" "net/http/httptest" "regexp" - "strings" "testing" "github.com/synyx/tuwat/pkg/connectors" "github.com/synyx/tuwat/pkg/connectors/common" ) -func mockConnector() (connectors.Connector, func()) { - testServer := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) { - res.WriteHeader(http.StatusOK) - _, _ = res.Write([]byte(mockResponse)) - })) - - cfg := Config{ - Tag: "test", - HTTPConfig: common.HTTPConfig{ - URL: testServer.URL, - }, - } - - return NewConnector(&cfg), func() { testServer.Close() } -} - func TestConnector(t *testing.T) { connector, closer := mockConnector() defer closer() @@ -42,143 +24,90 @@ func TestConnector(t *testing.T) { } } -func TestDecode(t *testing.T) { - var foo []alert - err := json.Unmarshal([]byte(mockResponse), &foo) - if err != nil { - t.Fatal(err) - } -} - -func TestEncodingOfLinks(t *testing.T) { - connector, closer := mockConnector() - defer closer() - alerts, _ := connector.Collect(context.Background()) +func mockConnector() (connectors.Connector, func()) { + testServer := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) { + res.WriteHeader(http.StatusOK) + _, _ = res.Write([]byte(mockResponse)) + })) - alert := alerts[0] - for _, link := range alert.Links { - if !strings.Contains(string(link), "://") { - t.Error("There should be a non encoded url") - } + cfg := Config{ + Tag: "test", + HTTPConfig: common.HTTPConfig{ + URL: testServer.URL, + }, } + + return NewConnector(&cfg), func() { testServer.Close() } } const mockResponse = ` -[ - { - "annotations": { - "description": "There were ` + "`" + `1` + "`" + ` messages in queue ` + "`" + `meister` + "`" + ` vhost ` + "`" + `/` + "`" + `\nfor the last 15 minutes in app cluster ` + "`" + `` + "`" + ` in namespace ` + "`" + `app-stage` + "`" + `.\n", - "runbook": "https://gitlab.example.com/infra/k8s/app/-/tree/master/documentation/runbook.md#app-consumerless-queues", - "summary": "app cluster has queues with messages but no consumers." - }, - "endsAt": "2022-09-25T16:55:08.801Z", - "fingerprint": "006eeab040ebbfb6", - "receivers": [ - { - "name": "ops-mails" - } - ], - "startsAt": "2022-04-11T08:01:38.801Z", - "status": { - "inhibitedBy": [], - "silencedBy": [], - "state": "active" - }, - "updatedAt": "2022-09-25T16:51:09.004Z", - "generatorURL": "https://prometheus.example.com/graph?g0.expr=app_queue_messages%7Bqueue%21~%22.%2A%28dead%7Cdlq%7Cdebug%7Cmobile.sync%7Ctest%7Cmarvin%7Cresend%7Cmobile.events%29.%2A%22%7D+%3E+0+unless+app_queue_consumer_capacity+%3E+0&g0.tab=1", - "labels": { - "alertname": "appQueueMessagesNoConsumer", - "cluster": "prometheus-apps", - "container": "app", - "endpoint": "prometheus", - "instance": "100.111.11.198:15692", - "job": "app-stage/app", - "namespace": "app-stage", - "pod": "app-server-0", - "prometheus": "apps-monitoring/apps", - "queue": "meister", - "severity": "warning", - "vhost": "/" - } - }, - { - "annotations": { - "description": "constraint violation of kind ContainerLimits in Pod gitlab-agent-7f967f9945-9k8p4 in namespace api-gateway-prod", - "summary": "container has no resource limits" - }, - "endsAt": "2022-09-25T16:54:42.994Z", - "fingerprint": "00ae4411e137c417", - "receivers": [ +{ + "status": "success", + "data": { + "groups": [ { - "name": "ops-mails" + "name": "failed authentications alert", + "file": "Folder", + "rules": [ + { + "state": "inactive", + "name": "Consumed no things alert", + "query": "", + "annotations": { + "__alertId__": "81", + "__dashboardUid__": "UlpdFLWMz", + "__panelId__": "7", + "message": "Long Message" + }, + "alerts": [ + { + "labels": { + "__contacts__": "\"Trucking Team\",\"jbuch mail\"", + "alertname": "Consumed no things alert", + "grafana_folder": "Folder", + "rule_uid": "kbMKlW04z" + }, + "annotations": { + "__alertId__": "81", + "__dashboardUid__": "UlpdFLWMz", + "__panelId__": "7", + "message": "Long Message" + }, + "state": "Normal", + "activeAt": "2024-08-13T12:41:40+02:00", + "value": "" + } + ], + "totals": { + "normal": 1 + }, + "totalsFiltered": { + "normal": 1 + }, + "labels": { + "__contacts__": "\"Trucking Team\",\"jbuch mail\"", + "rule_uid": "kbMKlW04z" + }, + "health": "ok", + "type": "alerting", + "lastEvaluation": "2024-08-30T15:18:40+02:00", + "evaluationTime": 6.723146319 + } + ], + "totals": { + "inactive": 1 + }, + "interval": 60, + "lastEvaluation": "2024-08-30T15:18:40+02:00", + "evaluationTime": 6.723146319 } ], - "startsAt": "2022-08-30T09:40:12.994Z", - "status": { - "inhibitedBy": [], - "silencedBy": [], - "state": "active" - }, - "updatedAt": "2022-09-25T16:50:43.008Z", - "generatorURL": "https://prometheus.example.com/graph?g0.expr=opa_scorecard_constraint_violations+%3E+0&g0.tab=1", - "labels": { - "alertname": "GatekeeperConstraintViolations", - "cluster": "prometheus-apps", - "context": "gatekeeper", - "endpoint": "9141-9141", - "instance": "100.111.12.32:9141", - "job": "opa-exporter", - "kind": "ContainerLimits", - "name": "pod-container-limits", - "namespace": "monitoring", - "pod": "opa-exporter-6fc88b44f4-6xgnn", - "prometheus": "apps-monitoring/apps", - "service": "opa-exporter", - "severity": "warning", - "violating_kind": "Pod", - "violating_name": "gitlab-agent-7f967f9945-9k8p4", - "violating_namespace": "api-gateway-prod", - "violation_enforcement": "warn", - "violation_msg": "container has no resource limits" - } - }, - { - "annotations": { - "description": "There were ` + "`" + `1` + "`" + ` dead lettered messages in queue ` + "`" + `selfcheckin.mot.checkin.command.deadletter` + "`" + ` vhost ` + "`" + `/` + "`" + `\nfor the last 15 minutes in app cluster ` + "`" + `` + "`" + ` in namespace ` + "`" + `app-stage` + "`" + `.\n", - "runbook": "https://gitlab.example.com/infra/k8s/app/-/tree/master/documentation/runbook.md#app-deadletter-queue", - "summary": "app cluster has dead letter messages" - }, - "endsAt": "2022-09-25T16:55:08.801Z", - "fingerprint": "01b99423f38362e5", - "receivers": [ - { - "name": "ops-mails" - } - ], - "startsAt": "2022-06-01T14:10:08.801Z", - "status": { - "inhibitedBy": [], - "silencedBy": [], - "state": "active" - }, - "updatedAt": "2022-09-25T16:51:08.900Z", - "generatorURL": "https://prometheus.example.com/graph?g0.expr=app_queue_messages%7Bqueue%3D~%22.%2A%28dead%7Cdlq%29.%2A%22%7D+%3E+0&g0.tab=1", - "labels": { - "alertname": "appDeadletterQueueMessages", - "cluster": "prometheus-apps", - "container": "app", - "endpoint": "prometheus", - "instance": "100.111.11.198:15692", - "job": "app-stage/app", - "namespace": "app-stage", - "pod": "app-server-0", - "prometheus": "apps-monitoring/apps", - "queue": "selfcheckin.mot.checkin.command.deadletter", - "severity": "warning", - "vhost": "/" + "totals": { + "inactive": 10, + "nodata": 5 } } -] +} ` func TestConnector_Collect(t *testing.T) { From 2d246baf69bced6c8d17607340dd3701e0245c58 Mon Sep 17 00:00:00 2001 From: Jonathan Buch Date: Sun, 1 Sep 2024 12:35:48 +0200 Subject: [PATCH 3/8] grafana, revise api usage * Document using an internal API * See https://github.com/grafana/grafana/discussions/60657 for more confusion --- pkg/connectors/grafana/api.go | 29 ++++++++------ pkg/connectors/grafana/connector.go | 7 +++- pkg/connectors/grafana/connector_test.go | 50 +++++++++++++++++------- 3 files changed, 58 insertions(+), 28 deletions(-) diff --git a/pkg/connectors/grafana/api.go b/pkg/connectors/grafana/api.go index 9bc3b40..bc253d2 100644 --- a/pkg/connectors/grafana/api.go +++ b/pkg/connectors/grafana/api.go @@ -1,15 +1,17 @@ package grafana -type alertingRulesResult struct { - Status string `json:"status"` - Data alertingRulesData `json:"data"` +// https://raw.githubusercontent.com/grafana/grafana/main/pkg/services/ngalert/api/tooling/post.json + +type ruleResponse struct { + Status string `json:"status"` + Data ruleDiscovery `json:"data,omitempty"` } -type alertingRulesData struct { - Groups []alertingRulesGroup `json:"groups"` +type ruleDiscovery struct { + Groups []ruleGroup `json:"groups"` } -type alertingRulesGroup struct { +type ruleGroup struct { Name string `json:"name"` File string `json:"file"` Rules []alertingRule `json:"rules"` @@ -21,8 +23,8 @@ type alertingRule struct { ActiveAt string `json:"activeAt"` Health string `json:"health"` Annotations map[string]string `json:"annotations"` - Labels map[string]string `json:"labels"` - Alerts []alert `json:"alerts"` + Labels map[string]string `json:"labels,omitempty"` + Alerts []alert `json:"alerts,omitempty"` Type string `json:"type"` } @@ -31,17 +33,20 @@ type alert struct { Annotations map[string]string `json:"annotations"` State string `json:"state"` ActiveAt string `json:"activeAt"` - Value int `json:"value"` + Value string `json:"value"` } type alertingState = string const ( + alertingStatePending = "pending" alertingStateFiring = "firing" - alertingStateAlerting = "alerting" alertingStateInactive = "inactive" +) + +const ( + alertingStateAlerting = "alerting" alertingStateNoData = "nodata" - alertingStateNormal = "Normal" - alertingStatePending = "pending" + alertingStateNormal = "normal" alertingStateError = "error" ) diff --git a/pkg/connectors/grafana/connector.go b/pkg/connectors/grafana/connector.go index d489be1..35a61d6 100644 --- a/pkg/connectors/grafana/connector.go +++ b/pkg/connectors/grafana/connector.go @@ -55,6 +55,7 @@ func (c *Connector) Collect(ctx context.Context) ([]connectors.Alert, error) { } labels := map[string]string{ + "Hostname": sourceAlert.Labels["grafana_folder"], "Folder": sourceAlert.Labels["grafana_folder"], "Alertname": sourceAlert.Labels["alertname"], "Contacts": sourceAlert.Labels["__contacts__"], @@ -82,6 +83,8 @@ func grafanaStateToState(state string) connectors.State { switch strings.ToLower(state) { case alertingStateAlerting: return connectors.Critical + case alertingStateNoData: + return connectors.Warning default: return connectors.OK } @@ -91,7 +94,7 @@ func (c *Connector) String() string { return fmt.Sprintf("Grafana (%s)", c.config.URL) } -func (c *Connector) collectAlerts(ctx context.Context) ([]alertingRulesGroup, error) { +func (c *Connector) collectAlerts(ctx context.Context) ([]ruleGroup, error) { res, err := c.get(ctx, "/api/prometheus/grafana/api/v1/rules") if err != nil { return nil, err @@ -103,7 +106,7 @@ func (c *Connector) collectAlerts(ctx context.Context) ([]alertingRulesGroup, er decoder := json.NewDecoder(buf) - var response alertingRulesResult + var response ruleResponse err = decoder.Decode(&response) if err != nil { slog.ErrorContext(ctx, "Cannot parse", diff --git a/pkg/connectors/grafana/connector_test.go b/pkg/connectors/grafana/connector_test.go index 89e4569..fecc1b1 100644 --- a/pkg/connectors/grafana/connector_test.go +++ b/pkg/connectors/grafana/connector_test.go @@ -5,39 +5,61 @@ import ( "net/http" "net/http/httptest" "regexp" + "strings" "testing" - "github.com/synyx/tuwat/pkg/connectors" "github.com/synyx/tuwat/pkg/connectors/common" ) func TestConnector(t *testing.T) { - connector, closer := mockConnector() - defer closer() + connector, mockServer := testConnector(map[string][]string{ + "/api/prometheus/grafana/api/v1/rules": {mockResponse}, + }) + defer func() { mockServer.Close() }() + alerts, err := connector.Collect(context.Background()) if err != nil { t.Fatal(err) } - if alerts == nil || len(alerts) != 3 { + if len(alerts) == 0 { t.Error("There should be alerts") } } -func mockConnector() (connectors.Connector, func()) { - testServer := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) { +// testConnector builds a connector with a mocked backend. +// Each usage of the backend server will return the next mocked body in order. +func testConnector(endpoints map[string][]string) (*Connector, *httptest.Server) { + calls := map[string]int{} + for k := range endpoints { + calls[k] = 0 + } + + mockServer := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) { res.WriteHeader(http.StatusOK) - _, _ = res.Write([]byte(mockResponse)) + + for endpoint, bodies := range endpoints { + if strings.HasPrefix(req.URL.Path, endpoint) { + if calls[endpoint] >= len(bodies) { + panic("missing additional mock for endpoint " + endpoint) + } + body := bodies[calls[endpoint]] + calls[endpoint]++ + if _, err := res.Write([]byte(body)); err != nil { + panic(err) + } + } + } })) cfg := Config{ Tag: "test", HTTPConfig: common.HTTPConfig{ - URL: testServer.URL, + URL: mockServer.URL, }, } - return NewConnector(&cfg), func() { testServer.Close() } + return NewConnector(&cfg), mockServer } const mockResponse = ` @@ -50,7 +72,7 @@ const mockResponse = ` "file": "Folder", "rules": [ { - "state": "inactive", + "state": "alerting", "name": "Consumed no things alert", "query": "", "annotations": { @@ -62,7 +84,7 @@ const mockResponse = ` "alerts": [ { "labels": { - "__contacts__": "\"Trucking Team\",\"jbuch mail\"", + "__contacts__": "\"Team\",\"jbuch mail\"", "alertname": "Consumed no things alert", "grafana_folder": "Folder", "rule_uid": "kbMKlW04z" @@ -73,7 +95,7 @@ const mockResponse = ` "__panelId__": "7", "message": "Long Message" }, - "state": "Normal", + "state": "Alerting", "activeAt": "2024-08-13T12:41:40+02:00", "value": "" } @@ -85,10 +107,10 @@ const mockResponse = ` "normal": 1 }, "labels": { - "__contacts__": "\"Trucking Team\",\"jbuch mail\"", + "__contacts__": "\"Team\",\"jbuch mail\"", "rule_uid": "kbMKlW04z" }, - "health": "ok", + "health": "nodata", "type": "alerting", "lastEvaluation": "2024-08-30T15:18:40+02:00", "evaluationTime": 6.723146319 From ac62137e48c8354edba89c9b4ccf50f05b3a9190 Mon Sep 17 00:00:00 2001 From: Jonathan Buch Date: Sun, 1 Sep 2024 12:55:38 +0200 Subject: [PATCH 4/8] grafana, add new alert provisioning api --- pkg/connectors/grafana/api.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pkg/connectors/grafana/api.go b/pkg/connectors/grafana/api.go index bc253d2..85b63f2 100644 --- a/pkg/connectors/grafana/api.go +++ b/pkg/connectors/grafana/api.go @@ -1,5 +1,7 @@ package grafana +import "time" + // https://raw.githubusercontent.com/grafana/grafana/main/pkg/services/ngalert/api/tooling/post.json type ruleResponse struct { @@ -50,3 +52,25 @@ const ( alertingStateNormal = "normal" alertingStateError = "error" ) + +// https://grafana.com/docs/grafana/latest/developers/http_api/alerting_provisioning/#provisioned-alert-rules + +type provisionedAlertRule struct { + Annotations map[string]string `json:"annotations"` + Condition string `json:"condition"` + ExecErrState string `json:"execErrState"` + Uid int64 `json:"id"` + IsPaused bool `json:"isPaused"` + Labels map[string]string `json:"labels"` + NoDataState string `json:"noDataState"` + For time.Duration `json:"for"` + Title string `json:"title"` + RuleGroup string `json:"ruleGroup"` +} + +const ( + noDataStateNoData = "NoData" + noDataStateOk = "OK" + execErrStateAlerting = "Alerting" + execErrStateError = "Error" +) From a2214be2e1fb66d17d5a0ebddbec1b4eb6fb2e31 Mon Sep 17 00:00:00 2001 From: Jonathan Buch Date: Sun, 1 Sep 2024 14:00:04 +0200 Subject: [PATCH 5/8] grafana, use alertmanager v2 alerts api * Note that grafana api is "lying". The published alerts are not firing and are still published as alerting. * This behaviour mirrors the intent of the provisioning api, which also hides the current alert state. --- pkg/connectors/grafana/api.go | 76 ------------------ pkg/connectors/grafana/connector.go | 118 ++++++---------------------- 2 files changed, 22 insertions(+), 172 deletions(-) delete mode 100644 pkg/connectors/grafana/api.go diff --git a/pkg/connectors/grafana/api.go b/pkg/connectors/grafana/api.go deleted file mode 100644 index 85b63f2..0000000 --- a/pkg/connectors/grafana/api.go +++ /dev/null @@ -1,76 +0,0 @@ -package grafana - -import "time" - -// https://raw.githubusercontent.com/grafana/grafana/main/pkg/services/ngalert/api/tooling/post.json - -type ruleResponse struct { - Status string `json:"status"` - Data ruleDiscovery `json:"data,omitempty"` -} - -type ruleDiscovery struct { - Groups []ruleGroup `json:"groups"` -} - -type ruleGroup struct { - Name string `json:"name"` - File string `json:"file"` - Rules []alertingRule `json:"rules"` -} - -type alertingRule struct { - State alertingState `json:"state"` - Name string `json:"name"` - ActiveAt string `json:"activeAt"` - Health string `json:"health"` - Annotations map[string]string `json:"annotations"` - Labels map[string]string `json:"labels,omitempty"` - Alerts []alert `json:"alerts,omitempty"` - Type string `json:"type"` -} - -type alert struct { - Labels map[string]string `json:"labels"` - Annotations map[string]string `json:"annotations"` - State string `json:"state"` - ActiveAt string `json:"activeAt"` - Value string `json:"value"` -} - -type alertingState = string - -const ( - alertingStatePending = "pending" - alertingStateFiring = "firing" - alertingStateInactive = "inactive" -) - -const ( - alertingStateAlerting = "alerting" - alertingStateNoData = "nodata" - alertingStateNormal = "normal" - alertingStateError = "error" -) - -// https://grafana.com/docs/grafana/latest/developers/http_api/alerting_provisioning/#provisioned-alert-rules - -type provisionedAlertRule struct { - Annotations map[string]string `json:"annotations"` - Condition string `json:"condition"` - ExecErrState string `json:"execErrState"` - Uid int64 `json:"id"` - IsPaused bool `json:"isPaused"` - Labels map[string]string `json:"labels"` - NoDataState string `json:"noDataState"` - For time.Duration `json:"for"` - Title string `json:"title"` - RuleGroup string `json:"ruleGroup"` -} - -const ( - noDataStateNoData = "NoData" - noDataStateOk = "OK" - execErrStateAlerting = "Alerting" - execErrStateError = "Error" -) diff --git a/pkg/connectors/grafana/connector.go b/pkg/connectors/grafana/connector.go index 35a61d6..412f2f2 100644 --- a/pkg/connectors/grafana/connector.go +++ b/pkg/connectors/grafana/connector.go @@ -1,24 +1,18 @@ package grafana import ( - "bytes" "context" - "encoding/json" "fmt" html "html/template" - "io" - "log/slog" - "net/http" - "strings" - "time" "github.com/synyx/tuwat/pkg/connectors" + "github.com/synyx/tuwat/pkg/connectors/alertmanager" "github.com/synyx/tuwat/pkg/connectors/common" ) type Connector struct { config Config - client *http.Client + ac *alertmanager.Connector } type Config struct { @@ -28,7 +22,14 @@ type Config struct { } func NewConnector(cfg *Config) *Connector { - c := &Connector{config: *cfg, client: cfg.HTTPConfig.Client()} + alertmanagerConfig := &alertmanager.Config{ + Tag: cfg.Tag, + Cluster: cfg.Cluster, + HTTPConfig: cfg.HTTPConfig, + } + alertmanagerConfig.URL += "/api/alertmanager/grafana" + + c := &Connector{config: *cfg, ac: alertmanager.NewConnector(alertmanagerConfig)} return c } @@ -38,39 +39,27 @@ func (c *Connector) Tag() string { } func (c *Connector) Collect(ctx context.Context) ([]connectors.Alert, error) { - sourceAlertGroups, err := c.collectAlerts(ctx) + sourceAlerts, err := c.ac.Collect(ctx) if err != nil { return nil, err } var alerts []connectors.Alert - for _, sourceAlertGroup := range sourceAlertGroups { - rule := sourceAlertGroup.Rules[0] - sourceAlert := rule.Alerts[0] - - state := grafanaStateToState(sourceAlert.State) - if state == connectors.OK { - continue - } - + for _, alert := range sourceAlerts { + alert.Description = alert.Labels["rulename"] + alert.Details = alert.Labels["message"] labels := map[string]string{ - "Hostname": sourceAlert.Labels["grafana_folder"], - "Folder": sourceAlert.Labels["grafana_folder"], - "Alertname": sourceAlert.Labels["alertname"], - "Contacts": sourceAlert.Labels["__contacts__"], + "Hostname": alert.Labels["grafana_folder"], + "Contacts": alert.Labels["__contacts__"], + } + for k, v := range labels { + alert.Labels[k] = v } - alert := connectors.Alert{ - Labels: labels, - Start: parseTime(sourceAlert.ActiveAt), - State: state, - Description: rule.Name, - Details: rule.Annotations["message"], - Links: []html.HTML{ - html.HTML("🏠"), - html.HTML("🏠"), - }, + alert.Links = []html.HTML{ + html.HTML("🏠"), + html.HTML("🏠"), } alerts = append(alerts, alert) @@ -79,69 +68,6 @@ func (c *Connector) Collect(ctx context.Context) ([]connectors.Alert, error) { return alerts, nil } -func grafanaStateToState(state string) connectors.State { - switch strings.ToLower(state) { - case alertingStateAlerting: - return connectors.Critical - case alertingStateNoData: - return connectors.Warning - default: - return connectors.OK - } -} - func (c *Connector) String() string { return fmt.Sprintf("Grafana (%s)", c.config.URL) } - -func (c *Connector) collectAlerts(ctx context.Context) ([]ruleGroup, error) { - res, err := c.get(ctx, "/api/prometheus/grafana/api/v1/rules") - if err != nil { - return nil, err - } - defer res.Body.Close() - - b, _ := io.ReadAll(res.Body) - buf := bytes.NewBuffer(b) - - decoder := json.NewDecoder(buf) - - var response ruleResponse - err = decoder.Decode(&response) - if err != nil { - slog.ErrorContext(ctx, "Cannot parse", - slog.String("url", c.config.URL), - slog.String("data", buf.String()), - slog.Any("status", res.StatusCode), - slog.Any("error", err)) - return nil, err - } - - return response.Data.Groups, nil -} - -func (c *Connector) get(ctx context.Context, endpoint string) (*http.Response, error) { - - slog.DebugContext(ctx, "getting alerts", slog.String("url", c.config.URL+endpoint)) - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.config.URL+endpoint, nil) - if err != nil { - return nil, err - } - - req.Header.Set("Accept", "application/json") - - res, err := c.client.Do(req) - if err != nil { - return nil, err - } - - return res, nil -} -func parseTime(timeField string) time.Time { - t, err := time.Parse("2006-01-02T15:04:05.999-07:00", timeField) - if err != nil { - return time.Time{} - } - return t -} From 746c0f2c924bcc328dca68582b98eb49e4d2ceb3 Mon Sep 17 00:00:00 2001 From: Jonathan Buch Date: Sun, 1 Sep 2024 14:11:40 +0200 Subject: [PATCH 6/8] Revert "grafana, use alertmanager v2 alerts api" * the prometheus v1 API rules at least provides the current state, as opposed to: * /api/ruler/grafana/api/v1/rules/ * /api/alertmanager/grafana/api/v2/alerts * /api/prometheus/grafana/api/v1/alerts * /api/v1/provisioning/alert-rules * /api/alerts * This reverts commit a2214be2e1fb66d17d5a0ebddbec1b4eb6fb2e31. --- pkg/connectors/grafana/api.go | 76 ++++++++++++++++++ pkg/connectors/grafana/connector.go | 118 ++++++++++++++++++++++------ 2 files changed, 172 insertions(+), 22 deletions(-) create mode 100644 pkg/connectors/grafana/api.go diff --git a/pkg/connectors/grafana/api.go b/pkg/connectors/grafana/api.go new file mode 100644 index 0000000..85b63f2 --- /dev/null +++ b/pkg/connectors/grafana/api.go @@ -0,0 +1,76 @@ +package grafana + +import "time" + +// https://raw.githubusercontent.com/grafana/grafana/main/pkg/services/ngalert/api/tooling/post.json + +type ruleResponse struct { + Status string `json:"status"` + Data ruleDiscovery `json:"data,omitempty"` +} + +type ruleDiscovery struct { + Groups []ruleGroup `json:"groups"` +} + +type ruleGroup struct { + Name string `json:"name"` + File string `json:"file"` + Rules []alertingRule `json:"rules"` +} + +type alertingRule struct { + State alertingState `json:"state"` + Name string `json:"name"` + ActiveAt string `json:"activeAt"` + Health string `json:"health"` + Annotations map[string]string `json:"annotations"` + Labels map[string]string `json:"labels,omitempty"` + Alerts []alert `json:"alerts,omitempty"` + Type string `json:"type"` +} + +type alert struct { + Labels map[string]string `json:"labels"` + Annotations map[string]string `json:"annotations"` + State string `json:"state"` + ActiveAt string `json:"activeAt"` + Value string `json:"value"` +} + +type alertingState = string + +const ( + alertingStatePending = "pending" + alertingStateFiring = "firing" + alertingStateInactive = "inactive" +) + +const ( + alertingStateAlerting = "alerting" + alertingStateNoData = "nodata" + alertingStateNormal = "normal" + alertingStateError = "error" +) + +// https://grafana.com/docs/grafana/latest/developers/http_api/alerting_provisioning/#provisioned-alert-rules + +type provisionedAlertRule struct { + Annotations map[string]string `json:"annotations"` + Condition string `json:"condition"` + ExecErrState string `json:"execErrState"` + Uid int64 `json:"id"` + IsPaused bool `json:"isPaused"` + Labels map[string]string `json:"labels"` + NoDataState string `json:"noDataState"` + For time.Duration `json:"for"` + Title string `json:"title"` + RuleGroup string `json:"ruleGroup"` +} + +const ( + noDataStateNoData = "NoData" + noDataStateOk = "OK" + execErrStateAlerting = "Alerting" + execErrStateError = "Error" +) diff --git a/pkg/connectors/grafana/connector.go b/pkg/connectors/grafana/connector.go index 412f2f2..35a61d6 100644 --- a/pkg/connectors/grafana/connector.go +++ b/pkg/connectors/grafana/connector.go @@ -1,18 +1,24 @@ package grafana import ( + "bytes" "context" + "encoding/json" "fmt" html "html/template" + "io" + "log/slog" + "net/http" + "strings" + "time" "github.com/synyx/tuwat/pkg/connectors" - "github.com/synyx/tuwat/pkg/connectors/alertmanager" "github.com/synyx/tuwat/pkg/connectors/common" ) type Connector struct { config Config - ac *alertmanager.Connector + client *http.Client } type Config struct { @@ -22,14 +28,7 @@ type Config struct { } func NewConnector(cfg *Config) *Connector { - alertmanagerConfig := &alertmanager.Config{ - Tag: cfg.Tag, - Cluster: cfg.Cluster, - HTTPConfig: cfg.HTTPConfig, - } - alertmanagerConfig.URL += "/api/alertmanager/grafana" - - c := &Connector{config: *cfg, ac: alertmanager.NewConnector(alertmanagerConfig)} + c := &Connector{config: *cfg, client: cfg.HTTPConfig.Client()} return c } @@ -39,27 +38,39 @@ func (c *Connector) Tag() string { } func (c *Connector) Collect(ctx context.Context) ([]connectors.Alert, error) { - sourceAlerts, err := c.ac.Collect(ctx) + sourceAlertGroups, err := c.collectAlerts(ctx) if err != nil { return nil, err } var alerts []connectors.Alert - for _, alert := range sourceAlerts { - alert.Description = alert.Labels["rulename"] - alert.Details = alert.Labels["message"] - labels := map[string]string{ - "Hostname": alert.Labels["grafana_folder"], - "Contacts": alert.Labels["__contacts__"], + for _, sourceAlertGroup := range sourceAlertGroups { + rule := sourceAlertGroup.Rules[0] + sourceAlert := rule.Alerts[0] + + state := grafanaStateToState(sourceAlert.State) + if state == connectors.OK { + continue } - for k, v := range labels { - alert.Labels[k] = v + + labels := map[string]string{ + "Hostname": sourceAlert.Labels["grafana_folder"], + "Folder": sourceAlert.Labels["grafana_folder"], + "Alertname": sourceAlert.Labels["alertname"], + "Contacts": sourceAlert.Labels["__contacts__"], } - alert.Links = []html.HTML{ - html.HTML("🏠"), - html.HTML("🏠"), + alert := connectors.Alert{ + Labels: labels, + Start: parseTime(sourceAlert.ActiveAt), + State: state, + Description: rule.Name, + Details: rule.Annotations["message"], + Links: []html.HTML{ + html.HTML("🏠"), + html.HTML("🏠"), + }, } alerts = append(alerts, alert) @@ -68,6 +79,69 @@ func (c *Connector) Collect(ctx context.Context) ([]connectors.Alert, error) { return alerts, nil } +func grafanaStateToState(state string) connectors.State { + switch strings.ToLower(state) { + case alertingStateAlerting: + return connectors.Critical + case alertingStateNoData: + return connectors.Warning + default: + return connectors.OK + } +} + func (c *Connector) String() string { return fmt.Sprintf("Grafana (%s)", c.config.URL) } + +func (c *Connector) collectAlerts(ctx context.Context) ([]ruleGroup, error) { + res, err := c.get(ctx, "/api/prometheus/grafana/api/v1/rules") + if err != nil { + return nil, err + } + defer res.Body.Close() + + b, _ := io.ReadAll(res.Body) + buf := bytes.NewBuffer(b) + + decoder := json.NewDecoder(buf) + + var response ruleResponse + err = decoder.Decode(&response) + if err != nil { + slog.ErrorContext(ctx, "Cannot parse", + slog.String("url", c.config.URL), + slog.String("data", buf.String()), + slog.Any("status", res.StatusCode), + slog.Any("error", err)) + return nil, err + } + + return response.Data.Groups, nil +} + +func (c *Connector) get(ctx context.Context, endpoint string) (*http.Response, error) { + + slog.DebugContext(ctx, "getting alerts", slog.String("url", c.config.URL+endpoint)) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.config.URL+endpoint, nil) + if err != nil { + return nil, err + } + + req.Header.Set("Accept", "application/json") + + res, err := c.client.Do(req) + if err != nil { + return nil, err + } + + return res, nil +} +func parseTime(timeField string) time.Time { + t, err := time.Parse("2006-01-02T15:04:05.999-07:00", timeField) + if err != nil { + return time.Time{} + } + return t +} From 626018dba3367683d0223a0ce8e09da697ca40d5 Mon Sep 17 00:00:00 2001 From: Jonathan Buch Date: Sun, 1 Sep 2024 14:36:10 +0200 Subject: [PATCH 7/8] grafana, simplify testing code --- pkg/connectors/grafana/connector_test.go | 33 +++--------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/pkg/connectors/grafana/connector_test.go b/pkg/connectors/grafana/connector_test.go index fecc1b1..b800597 100644 --- a/pkg/connectors/grafana/connector_test.go +++ b/pkg/connectors/grafana/connector_test.go @@ -4,7 +4,6 @@ import ( "context" "net/http" "net/http/httptest" - "regexp" "strings" "testing" @@ -12,8 +11,8 @@ import ( ) func TestConnector(t *testing.T) { - connector, mockServer := testConnector(map[string][]string{ - "/api/prometheus/grafana/api/v1/rules": {mockResponse}, + connector, mockServer := testConnector(map[string]string{ + "/api/prometheus/grafana/api/v1/rules": mockResponse, }) defer func() { mockServer.Close() }() @@ -27,24 +26,12 @@ func TestConnector(t *testing.T) { } } -// testConnector builds a connector with a mocked backend. -// Each usage of the backend server will return the next mocked body in order. -func testConnector(endpoints map[string][]string) (*Connector, *httptest.Server) { - calls := map[string]int{} - for k := range endpoints { - calls[k] = 0 - } - +func testConnector(endpoints map[string]string) (*Connector, *httptest.Server) { mockServer := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) { res.WriteHeader(http.StatusOK) - for endpoint, bodies := range endpoints { + for endpoint, body := range endpoints { if strings.HasPrefix(req.URL.Path, endpoint) { - if calls[endpoint] >= len(bodies) { - panic("missing additional mock for endpoint " + endpoint) - } - body := bodies[calls[endpoint]] - calls[endpoint]++ if _, err := res.Write([]byte(body)); err != nil { panic(err) } @@ -131,15 +118,3 @@ const mockResponse = ` } } ` - -func TestConnector_Collect(t *testing.T) { - r := regexp.MustCompile(`in namespace\W+([a-zA-Z-0-9_-]+)`) - details := "constraint violation of kind ContainerLimits in Pod gitlab-agent-landingpage-659cf9567d-kkxsl in namespace api-gateway-stage\n\t\t" - where := "" - if s := r.FindAllStringSubmatch(details, 1); len(s) > 0 { - where = s[0][1] - } - if where != "api-gateway-stage" { - t.Fail() - } -} From 01843792b12c171ffb8659af722417ef59b6cbd3 Mon Sep 17 00:00:00 2001 From: Jonathan Buch Date: Sun, 1 Sep 2024 15:02:32 +0200 Subject: [PATCH 8/8] grafana, revise state handling --- pkg/connectors/grafana/api.go | 46 ++++++++--------------------- pkg/connectors/grafana/connector.go | 17 +++++++++++ 2 files changed, 29 insertions(+), 34 deletions(-) diff --git a/pkg/connectors/grafana/api.go b/pkg/connectors/grafana/api.go index 85b63f2..a7c1ccb 100644 --- a/pkg/connectors/grafana/api.go +++ b/pkg/connectors/grafana/api.go @@ -1,8 +1,7 @@ package grafana -import "time" - // https://raw.githubusercontent.com/grafana/grafana/main/pkg/services/ngalert/api/tooling/post.json +// https://prometheus.io/docs/prometheus/latest/querying/api/#rules type ruleResponse struct { Status string `json:"status"` @@ -20,10 +19,9 @@ type ruleGroup struct { } type alertingRule struct { - State alertingState `json:"state"` + State alertingRuleState `json:"state"` Name string `json:"name"` ActiveAt string `json:"activeAt"` - Health string `json:"health"` Annotations map[string]string `json:"annotations"` Labels map[string]string `json:"labels,omitempty"` Alerts []alert `json:"alerts,omitempty"` @@ -33,44 +31,24 @@ type alertingRule struct { type alert struct { Labels map[string]string `json:"labels"` Annotations map[string]string `json:"annotations"` - State string `json:"state"` + State alertingState `json:"state"` ActiveAt string `json:"activeAt"` Value string `json:"value"` } -type alertingState = string - -const ( - alertingStatePending = "pending" - alertingStateFiring = "firing" - alertingStateInactive = "inactive" -) +type alertingRuleState = string const ( - alertingStateAlerting = "alerting" - alertingStateNoData = "nodata" - alertingStateNormal = "normal" - alertingStateError = "error" + alertingStateFiring alertingRuleState = "firing" + alertingStatePending alertingRuleState = "pending" + alertingStateInactive alertingRuleState = "inactive" ) -// https://grafana.com/docs/grafana/latest/developers/http_api/alerting_provisioning/#provisioned-alert-rules - -type provisionedAlertRule struct { - Annotations map[string]string `json:"annotations"` - Condition string `json:"condition"` - ExecErrState string `json:"execErrState"` - Uid int64 `json:"id"` - IsPaused bool `json:"isPaused"` - Labels map[string]string `json:"labels"` - NoDataState string `json:"noDataState"` - For time.Duration `json:"for"` - Title string `json:"title"` - RuleGroup string `json:"ruleGroup"` -} +type alertingState = string const ( - noDataStateNoData = "NoData" - noDataStateOk = "OK" - execErrStateAlerting = "Alerting" - execErrStateError = "Error" + alertingStateAlerting alertingState = "alerting" + alertingStateNoData alertingState = "nodata" + alertingStateNormal alertingState = "normal" + alertingStateError alertingState = "error" ) diff --git a/pkg/connectors/grafana/connector.go b/pkg/connectors/grafana/connector.go index 35a61d6..cb592a5 100644 --- a/pkg/connectors/grafana/connector.go +++ b/pkg/connectors/grafana/connector.go @@ -47,6 +47,19 @@ func (c *Connector) Collect(ctx context.Context) ([]connectors.Alert, error) { for _, sourceAlertGroup := range sourceAlertGroups { rule := sourceAlertGroup.Rules[0] + switch rule.State { + case alertingStateInactive: + continue + case alertingStatePending: + fallthrough + case alertingStateFiring: + // ok + } + + if rule.Type != "alerting" { + continue + } + sourceAlert := rule.Alerts[0] state := grafanaStateToState(sourceAlert.State) @@ -84,7 +97,11 @@ func grafanaStateToState(state string) connectors.State { case alertingStateAlerting: return connectors.Critical case alertingStateNoData: + return connectors.Unknown + case alertingStateError: return connectors.Warning + case alertingStateNormal: + return connectors.OK default: return connectors.OK }