Skip to content

Commit

Permalink
Add the receiver name to notification metrics (#3045)
Browse files Browse the repository at this point in the history
* Add receiver name as a label to notify metrics

This commit adds in a second label to the notify family of metrics
(e.g. numTotalFailedNotifications) - the receiver name. This allows
disambiguating which receiver is failing when one has many receivers
with the same integration type

Signed-off-by: sinkingpoint <[email protected]>

* Gate receiver names behind a feature flag

Signed-off-by: sinkingpoint <[email protected]>

---------

Signed-off-by: sinkingpoint <[email protected]>
Signed-off-by: gotjosh <[email protected]>
Co-authored-by: gotjosh <[email protected]>
  • Loading branch information
sinkingpoint and gotjosh authored Sep 6, 2023
1 parent 6ce841c commit cfe4411
Show file tree
Hide file tree
Showing 6 changed files with 240 additions and 49 deletions.
15 changes: 12 additions & 3 deletions cmd/alertmanager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"github.com/prometheus/alertmanager/cluster"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/dispatch"
"github.com/prometheus/alertmanager/featurecontrol"
"github.com/prometheus/alertmanager/inhibit"
"github.com/prometheus/alertmanager/nflog"
"github.com/prometheus/alertmanager/notify"
Expand Down Expand Up @@ -142,7 +143,7 @@ func buildReceiverIntegrations(nc config.Receiver, tmpl *template.Template, logg
errs.Add(err)
return
}
integrations = append(integrations, notify.NewIntegration(n, rs, name, i))
integrations = append(integrations, notify.NewIntegration(n, rs, name, i, nc.Name))
}
)

Expand Down Expand Up @@ -231,6 +232,7 @@ func run() int {
tlsConfigFile = kingpin.Flag("cluster.tls-config", "[EXPERIMENTAL] Path to config yaml file that can enable mutual TLS within the gossip protocol.").Default("").String()
allowInsecureAdvertise = kingpin.Flag("cluster.allow-insecure-public-advertise-address-discovery", "[EXPERIMENTAL] Allow alertmanager to discover and listen on a public IP address.").Bool()
label = kingpin.Flag("cluster.label", "The cluster label is an optional string to include on each packet and stream. It uniquely identifies the cluster and prevents cross-communication issues when sending gossip messages.").Default("").String()
featureFlags = kingpin.Flag("enable-feature", fmt.Sprintf("Experimental features to enable. The flag can be repeated to enable multiple features. Valid options: %s", strings.Join(featurecontrol.AllowedFlags, ", "))).Default("").String()
)

promlogflag.AddFlags(kingpin.CommandLine, &promlogConfig)
Expand All @@ -245,7 +247,13 @@ func run() int {
level.Info(logger).Log("msg", "Starting Alertmanager", "version", version.Info())
level.Info(logger).Log("build_context", version.BuildContext())

err := os.MkdirAll(*dataDir, 0o777)
featureConfig, err := featurecontrol.NewFlags(logger, *featureFlags)
if err != nil {
level.Error(logger).Log("msg", "error parsing the feature flag list", "err", err)
return 1
}

err = os.MkdirAll(*dataDir, 0o777)
if err != nil {
level.Error(logger).Log("msg", "Unable to create data directory", "err", err)
return 1
Expand Down Expand Up @@ -421,7 +429,7 @@ func run() int {
)

dispMetrics := dispatch.NewDispatcherMetrics(false, prometheus.DefaultRegisterer)
pipelineBuilder := notify.NewPipelineBuilder(prometheus.DefaultRegisterer)
pipelineBuilder := notify.NewPipelineBuilder(prometheus.DefaultRegisterer, featureConfig)
configLogger := log.With(logger, "component", "configuration")
configCoordinator := config.NewCoordinator(
*configFile,
Expand Down Expand Up @@ -493,6 +501,7 @@ func run() int {
notificationLog,
pipelinePeer,
)

configuredReceivers.Set(float64(len(activeReceivers)))
configuredIntegrations.Set(float64(integrationsNum))

Expand Down
4 changes: 2 additions & 2 deletions cmd/alertmanager/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ func TestBuildReceiverIntegrations(t *testing.T) {
},
},
exp: []notify.Integration{
notify.NewIntegration(nil, sendResolved(false), "webhook", 0),
notify.NewIntegration(nil, sendResolved(true), "webhook", 1),
notify.NewIntegration(nil, sendResolved(false), "webhook", 0, "foo"),
notify.NewIntegration(nil, sendResolved(true), "webhook", 1, "foo"),
},
},
{
Expand Down
78 changes: 78 additions & 0 deletions featurecontrol/featurecontrol.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright 2023 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package featurecontrol

import (
"fmt"
"strings"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
)

const (
fcReceiverNameInMetrics = "receiver-name-in-metrics"
)

var AllowedFlags = []string{fcReceiverNameInMetrics}

type Flagger interface {
EnableReceiverNamesInMetrics() bool
}

type Flags struct {
logger log.Logger
enableReceiverNamesInMetrics bool
}

func (f *Flags) EnableReceiverNamesInMetrics() bool {
return f.enableReceiverNamesInMetrics
}

type flagOption func(flags *Flags)

func enableReceiverNameInMetrics() flagOption {
return func(configs *Flags) {
configs.enableReceiverNamesInMetrics = true
}
}

func NewFlags(logger log.Logger, features string) (Flagger, error) {
fc := &Flags{logger: logger}
opts := []flagOption{}

if len(features) == 0 {
return NoopFlags{}, nil
}

for _, feature := range strings.Split(features, ",") {
switch feature {
case fcReceiverNameInMetrics:
opts = append(opts, enableReceiverNameInMetrics())
level.Warn(logger).Log("msg", "Experimental receiver name in metrics enabled")
default:
return nil, fmt.Errorf("Unknown option '%s' for --enable-feature", feature)
}
}

for _, opt := range opts {
opt(fc)
}

return fc, nil
}

type NoopFlags struct{}

func (n NoopFlags) EnableReceiverNamesInMetrics() bool { return false }
58 changes: 58 additions & 0 deletions featurecontrol/featurecontrol_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright 2023 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package featurecontrol

import (
"errors"
"strings"
"testing"

"github.com/go-kit/log"
"github.com/stretchr/testify/require"
)

func TestFlags(t *testing.T) {
tc := []struct {
name string
featureFlags string
err error
}{
{
name: "with only valid feature flags",
featureFlags: fcReceiverNameInMetrics,
},
{
name: "with only invalid feature flags",
featureFlags: "somethingsomething",
err: errors.New("Unknown option 'somethingsomething' for --enable-feature"),
},
{
name: "with both, valid and invalid feature flags",
featureFlags: strings.Join([]string{fcReceiverNameInMetrics, "somethingbad"}, ","),
err: errors.New("Unknown option 'somethingbad' for --enable-feature"),
},
}

for _, tt := range tc {
t.Run(tt.name, func(t *testing.T) {
fc, err := NewFlags(log.NewNopLogger(), tt.featureFlags)
if tt.err != nil {
require.EqualError(t, err, tt.err.Error())
} else {
require.NoError(t, err)
require.NotNil(t, fc)
}
})
}
}
Loading

0 comments on commit cfe4411

Please sign in to comment.