Skip to content

Commit

Permalink
Fixes the registration of the Alertmanager alert receiving API metrics (
Browse files Browse the repository at this point in the history
#3065)

* Remove TODO about prometheus/alertmanager#2182 as it got merged

Signed-off-by: gotjosh <[email protected]>

* Remove TODO about prometheus/alertmanager#2200 as it got merged

Signed-off-by: gotjosh <[email protected]>

* Register the Alertmanager API metrics

Signed-off-by: gotjosh <[email protected]>

* Add a changelog entry

Signed-off-by: gotjosh <[email protected]>

* Fix tests

Signed-off-by: gotjosh <[email protected]>
  • Loading branch information
gotjosh authored Aug 24, 2020
1 parent 2cb22f8 commit a87c25f
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
* [BUGFIX] Experimental blocks storage: Ingester is less likely to hit gRPC message size limit when streaming data to queriers. #3015
* [BUGFIX] Fix configuration for TLS server validation, TLS skip verify was hardcoded to true for all TLS configurations and prevented validation of server certificates. #3030
* [BUGFIX] Fixes the Alertmanager panicking when no `-alertmanager.web.external-url` is provided. #3017
* [BUGFIX] Fixes the registration of the Alertmanager API metrics `cortex_alertmanager_alerts_received_total` and `cortex_alertmanager_alerts_invalid_total`. #3065

## 1.3.0 / 2020-08-21

Expand Down
8 changes: 8 additions & 0 deletions integration/alertmanager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"testing"

"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/stretchr/testify/require"

Expand Down Expand Up @@ -95,6 +96,13 @@ func TestAlertmanagerStoreAPI(t *testing.T) {
require.Len(t, cfg.Receivers, 1)
require.Equal(t, "example_receiver", cfg.Receivers[0].Name)

err = c.SendAlertToAlermanager(context.Background(), &model.Alert{Labels: model.LabelSet{"foo": "bar"}})
require.NoError(t, err)

require.NoError(t, am.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"cortex_alertmanager_alerts_received_total"},
e2e.WithLabelMatchers(labels.MustNewMatcher(labels.MatchEqual, "user", "user-1")),
e2e.WaitMissingMetrics))

err = c.DeleteAlertmanagerConfig(context.Background())
require.NoError(t, err)

Expand Down
27 changes: 27 additions & 0 deletions integration/e2ecortex/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/gogo/protobuf/proto"
"github.com/golang/snappy"
alertConfig "github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/types"
promapi "github.com/prometheus/client_golang/api"
promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
Expand Down Expand Up @@ -380,6 +381,32 @@ func (c *Client) DeleteAlertmanagerConfig(ctx context.Context) error {
return nil
}

// SendAlertToAlermanager sends alerts to the Alertmanager API
func (c *Client) SendAlertToAlermanager(ctx context.Context, alert *model.Alert) error {
u := c.alertmanagerClient.URL("/api/prom/api/v1/alerts", nil)

data, err := json.Marshal([]types.Alert{{Alert: *alert}})
if err != nil {
return fmt.Errorf("error marshaling the alert: %v", err)
}

req, err := http.NewRequest(http.MethodPost, u.String(), bytes.NewReader(data))
if err != nil {
return fmt.Errorf("error creating request: %v", err)
}

resp, body, err := c.alertmanagerClient.Do(ctx, req)
if err != nil {
return err
}

if resp.StatusCode != http.StatusOK {
return fmt.Errorf("sending alert failed with status %d and error %v", resp.StatusCode, string(body))
}

return nil
}

func (c *Client) PostRequest(url string, body io.Reader) (*http.Response, error) {
req, err := http.NewRequest("POST", url, body)
if err != nil {
Expand Down
16 changes: 7 additions & 9 deletions pkg/alertmanager/alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,16 @@ type Alertmanager struct {
mux *http.ServeMux
registry *prometheus.Registry

// The Dispatcher is the only component we need to recreate when we call ApplyConfig.
// Given its metrics don't have any variable labels we need to re-use the same metrics.
dispatcherMetrics *dispatch.DispatcherMetrics

activeMtx sync.Mutex
active bool
}

var (
webReload = make(chan chan error)

// In order to workaround a bug in the alertmanager, which doesn't register the
// metrics in the input registry but to the global default one, we do define a
// singleton dispatcher metrics instance that is going to be shared across all
// tenants alertmanagers.
// TODO change this once the vendored alertmanager will have this PR merged into:
// https://github.com/prometheus/alertmanager/pull/2200
dispatcherMetrics = dispatch.NewDispatcherMetrics(prometheus.NewRegistry())
)

func init() {
Expand Down Expand Up @@ -158,6 +154,7 @@ func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) {
Silences: am.silences,
StatusFunc: am.marker.Status,
Peer: cfg.Peer,
Registry: am.registry,
Logger: log.With(am.logger, "component", "api"),
GroupFunc: func(f1 func(*dispatch.Route) bool, f2 func(*types.Alert, time.Time) bool) (dispatch.AlertGroups, map[model.Fingerprint][]string) {
return am.dispatcher.Groups(f1, f2)
Expand All @@ -172,6 +169,7 @@ func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) {
ui.Register(router, webReload, log.With(am.logger, "component", "ui"))
am.mux = am.api.Register(router, am.cfg.ExternalURL.Path)

am.dispatcherMetrics = dispatch.NewDispatcherMetrics(am.registry)
return am, nil
}

Expand Down Expand Up @@ -240,7 +238,7 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
am.marker,
timeoutFunc,
log.With(am.logger, "component", "dispatcher"),
dispatcherMetrics,
am.dispatcherMetrics,
)

go am.dispatcher.Run()
Expand Down
4 changes: 0 additions & 4 deletions pkg/alertmanager/multitenant_test.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// +build !race

package alertmanager

import (
Expand Down Expand Up @@ -57,8 +55,6 @@ func (m *mockAlertStore) DeleteAlertConfig(ctx context.Context, user string) err
return fmt.Errorf("not implemented")
}

// TestLoadAllConfigs ensures the multitenant alertmanager can properly load configs from a local backend store.
// It is excluded from the race detector due to a vendored race issue https://github.com/prometheus/alertmanager/issues/2182
func TestLoadAllConfigs(t *testing.T) {
mockStore := &mockAlertStore{
configs: map[string]alerts.AlertConfigDesc{
Expand Down

0 comments on commit a87c25f

Please sign in to comment.