diff --git a/CHANGELOG.md b/CHANGELOG.md index 87e9decfc3..8753b5feff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ We use _breaking :warning:_ to mark changes that are not backward compatible (re - [#4171](https://github.com/thanos-io/thanos/pull/4171) Docker: Busybox image updated to latest (1.33.1) - [#4175](https://github.com/thanos-io/thanos/pull/4175) Added Tag Configuration Support Lightstep Tracing - [#4176](https://github.com/thanos-io/thanos/pull/4176) Query API: Adds optional `Stats param` to return stats for query APIs - +- [#4125](https://github.com/thanos-io/thanos/pull/4125) Rule: Add `--alert.relabel-config` / `--alert.relabel-config-file` allowing to specify alert relabel configurations like [Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) ### Fixed - ### Changed diff --git a/cmd/thanos/config.go b/cmd/thanos/config.go index b785f67e09..b9b5b7fcfe 100644 --- a/cmd/thanos/config.go +++ b/cmd/thanos/config.go @@ -202,6 +202,7 @@ type alertMgrConfig struct { alertmgrsDNSSDInterval time.Duration alertExcludeLabels []string alertQueryURL *string + alertRelabelConfigPath *extflag.PathOrContent } func (ac *alertMgrConfig) registerFlag(cmd extflag.FlagClause) *alertMgrConfig { @@ -215,5 +216,6 @@ func (ac *alertMgrConfig) registerFlag(cmd extflag.FlagClause) *alertMgrConfig { ac.alertQueryURL = cmd.Flag("alert.query-url", "The external Thanos Query URL that would be set in all alerts 'Source' field").String() cmd.Flag("alert.label-drop", "Labels by name to drop before sending to alertmanager. This allows alert to be deduplicated on replica label (repeated). Similar Prometheus alert relabelling"). StringsVar(&ac.alertExcludeLabels) + ac.alertRelabelConfigPath = extflag.RegisterPathOrContent(cmd, "alert.relabel-config", "YAML file that contains alert relabelling configuration.", false) return ac } diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index a5e57f305c..6ab0c34624 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -27,6 +27,7 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/common/route" "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/pkg/relabel" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/tsdb" @@ -69,9 +70,10 @@ type ruleConfig struct { query queryConfig queryConfigYAML []byte - alertmgr alertMgrConfig - alertmgrsConfigYAML []byte - alertQueryURL *url.URL + alertmgr alertMgrConfig + alertmgrsConfigYAML []byte + alertQueryURL *url.URL + alertRelabelConfigYAML []byte resendDelay time.Duration evalInterval time.Duration @@ -172,6 +174,11 @@ func registerRule(app *extkingpin.App) { return errors.New("--alertmanagers.url and --alertmanagers.config* parameters cannot be defined at the same time") } + conf.alertRelabelConfigYAML, err = conf.alertmgr.alertRelabelConfigPath.Content() + if err != nil { + return err + } + httpLogOpts, err := logging.ParseHTTPOptions(*reqLogDecision, reqLogConfig) if err != nil { return errors.Wrap(err, "error while parsing config for request logging") @@ -352,6 +359,14 @@ func runRule( level.Warn(logger).Log("msg", "no alertmanager configured") } + var alertRelabelConfigs []*relabel.Config + if len(conf.alertRelabelConfigYAML) > 0 { + alertRelabelConfigs, err = alert.LoadRelabelConfigs(conf.alertRelabelConfigYAML) + if err != nil { + return err + } + } + amProvider := dns.NewProvider( logger, extprom.WrapRegistererWithPrefix("thanos_rule_alertmanagers_", reg), @@ -377,7 +392,7 @@ func runRule( var ( ruleMgr *thanosrules.Manager - alertQ = alert.NewQueue(logger, reg, 10000, 100, labelsTSDBToProm(conf.lset), conf.alertmgr.alertExcludeLabels) + alertQ = alert.NewQueue(logger, reg, 10000, 100, labelsTSDBToProm(conf.lset), conf.alertmgr.alertExcludeLabels, alertRelabelConfigs) ) { // Run rule evaluation and alert notifications. diff --git a/docs/components/rule.md b/docs/components/rule.md index 4911870d7c..daac8409c3 100644 --- a/docs/components/rule.md +++ b/docs/components/rule.md @@ -244,6 +244,13 @@ Flags: --alert.query-url=ALERT.QUERY-URL The external Thanos Query URL that would be set in all alerts 'Source' field + --alert.relabel-config= + Alternative to 'alert.relabel-config-file' flag + (mutually exclusive). Content of YAML file that + contains alert relabelling configuration. + --alert.relabel-config-file= + Path to YAML file that contains alert + relabelling configuration. --alertmanagers.config= Alternative to 'alertmanagers.config-file' flag (mutually exclusive). Content of YAML file that diff --git a/pkg/alert/alert.go b/pkg/alert/alert.go index 59dd4c83f5..0258a4d2fa 100644 --- a/pkg/alert/alert.go +++ b/pkg/alert/alert.go @@ -24,6 +24,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/pkg/relabel" "go.uber.org/atomic" "github.com/thanos-io/thanos/pkg/runutil" @@ -85,11 +86,12 @@ func (a *Alert) ResolvedAt(ts time.Time) bool { // Queue is a queue of alert notifications waiting to be sent. The queue is consumed in batches // and entries are dropped at the front if it runs full. type Queue struct { - logger log.Logger - maxBatchSize int - capacity int - toAddLset labels.Labels - toExcludeLabels labels.Labels + logger log.Logger + maxBatchSize int + capacity int + toAddLset labels.Labels + toExcludeLabels labels.Labels + alertRelabelConfigs []*relabel.Config mtx sync.Mutex queue []*Alert @@ -120,19 +122,20 @@ func relabelLabels(lset labels.Labels, excludeLset []string) (toAdd labels.Label // NewQueue returns a new queue. The given label set is attached to all alerts pushed to the queue. // The given exclude label set tells what label names to drop including external labels. -func NewQueue(logger log.Logger, reg prometheus.Registerer, capacity, maxBatchSize int, externalLset labels.Labels, excludeLabels []string) *Queue { +func NewQueue(logger log.Logger, reg prometheus.Registerer, capacity, maxBatchSize int, externalLset labels.Labels, excludeLabels []string, alertRelabelConfigs []*relabel.Config) *Queue { toAdd, toExclude := relabelLabels(externalLset, excludeLabels) if logger == nil { logger = log.NewNopLogger() } q := &Queue{ - logger: logger, - capacity: capacity, - morec: make(chan struct{}, 1), - maxBatchSize: maxBatchSize, - toAddLset: toAdd, - toExcludeLabels: toExclude, + logger: logger, + capacity: capacity, + morec: make(chan struct{}, 1), + maxBatchSize: maxBatchSize, + toAddLset: toAdd, + toExcludeLabels: toExclude, + alertRelabelConfigs: alertRelabelConfigs, dropped: promauto.With(reg).NewCounter(prometheus.CounterOpts{ Name: "thanos_alert_queue_alerts_dropped_total", @@ -214,7 +217,6 @@ func (q *Queue) Push(alerts []*Alert) { q.pushed.Add(float64(len(alerts))) // Attach external labels and drop excluded labels before sending. - // TODO(bwplotka): User proper relabelling with https://github.com/thanos-io/thanos/issues/660. for _, a := range alerts { lb := labels.NewBuilder(labels.Labels{}) for _, l := range a.Labels { @@ -226,7 +228,7 @@ func (q *Queue) Push(alerts []*Alert) { for _, l := range q.toAddLset { lb.Set(l.Name, l.Value) } - a.Labels = lb.Labels() + a.Labels = relabel.Process(lb.Labels(), q.alertRelabelConfigs...) } // Queue capacity should be significantly larger than a single alert diff --git a/pkg/alert/alert_test.go b/pkg/alert/alert_test.go index 67f280280d..365b287ad9 100644 --- a/pkg/alert/alert_test.go +++ b/pkg/alert/alert_test.go @@ -13,7 +13,9 @@ import ( "time" "github.com/pkg/errors" + "github.com/prometheus/common/model" "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/pkg/relabel" promtestutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/thanos-io/thanos/pkg/testutil" @@ -24,9 +26,7 @@ func TestQueue_Pop_all_Pushed(t *testing.T) { batchsize := 1 pushes := 3 - q := NewQueue( - nil, nil, qcapacity, batchsize, nil, nil, - ) + q := NewQueue(nil, nil, qcapacity, batchsize, nil, nil, nil) for i := 0; i < pushes; i++ { q.Push([]*Alert{ {}, @@ -45,11 +45,7 @@ func TestQueue_Pop_all_Pushed(t *testing.T) { } func TestQueue_Push_Relabelled(t *testing.T) { - q := NewQueue( - nil, nil, 10, 10, - labels.FromStrings("a", "1", "replica", "A"), // Labels to be added. - []string{"b", "replica"}, // Labels to be dropped (excluding those added). - ) + q := NewQueue(nil, nil, 10, 10, labels.FromStrings("a", "1", "replica", "A"), []string{"b", "replica"}, nil) q.Push([]*Alert{ {Labels: labels.FromStrings("b", "2", "c", "3")}, @@ -63,6 +59,37 @@ func TestQueue_Push_Relabelled(t *testing.T) { testutil.Equals(t, labels.FromStrings("a", "1"), q.queue[2].Labels) } +func TestQueue_Push_Relabelled_Alerts(t *testing.T) { + q := NewQueue( + nil, nil, 10, 10, labels.New(), []string{}, + []*relabel.Config{ + { + SourceLabels: model.LabelNames{"a"}, + Separator: ";", + Regex: relabel.MustNewRegexp(".*(b).*"), + TargetLabel: "d", + Action: relabel.Replace, + Replacement: "$1", + }, + }, + ) + + q.Push([]*Alert{ + {Labels: labels.FromMap(map[string]string{ + "a": "abc", + })}, + }) + + testutil.Equals(t, 1, len(q.queue)) + testutil.Equals( + t, labels.FromMap(map[string]string{ + "a": "abc", + "d": "b", + }), + q.queue[0].Labels, + ) +} + func assertSameHosts(t *testing.T, expected []*url.URL, found []*url.URL) { testutil.Equals(t, len(expected), len(found)) diff --git a/pkg/alert/config.go b/pkg/alert/config.go index b701b39292..b6d5c0b33e 100644 --- a/pkg/alert/config.go +++ b/pkg/alert/config.go @@ -12,6 +12,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/pkg/relabel" "gopkg.in/yaml.v2" "github.com/thanos-io/thanos/pkg/discovery/dns" @@ -130,3 +131,12 @@ func BuildAlertmanagerConfig(address string, timeout time.Duration) (Alertmanage APIVersion: APIv1, }, nil } + +// LoadRelabelConfigs loads a list of relabel.Config from YAML data. +func LoadRelabelConfigs(confYaml []byte) ([]*relabel.Config, error) { + var cfg []*relabel.Config + if err := yaml.UnmarshalStrict(confYaml, &cfg); err != nil { + return nil, err + } + return cfg, nil +}