Skip to content

Commit

Permalink
Capture and expose notification delivery errors
Browse files Browse the repository at this point in the history
This PR makes it possible to store the last error for each receiver in case of notification delivery failure. These errors are exposed via the `/api/v2/receivers` endpoint.
  • Loading branch information
gotjosh authored and santihernandezc committed Jan 19, 2023
1 parent f9c1c90 commit 42cc05b
Show file tree
Hide file tree
Showing 13 changed files with 473 additions and 69 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,7 @@
!/.travis.yml
!/.promu.yml
!/api/v2/openapi.yaml

# Editor
.vscode
.DS_Store
5 changes: 3 additions & 2 deletions api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/prometheus/alertmanager/cluster"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/dispatch"
"github.com/prometheus/alertmanager/notify"
"github.com/prometheus/alertmanager/provider"
"github.com/prometheus/alertmanager/silence"
"github.com/prometheus/alertmanager/types"
Expand Down Expand Up @@ -195,9 +196,9 @@ func (api *API) Register(r *route.Router, routePrefix string) *http.ServeMux {

// Update config and resolve timeout of each API. APIv2 also needs
// setAlertStatus to be updated.
func (api *API) Update(cfg *config.Config, setAlertStatus func(model.LabelSet)) {
func (api *API) Update(cfg *config.Config, receivers []*notify.Receiver, setAlertStatus func(model.LabelSet)) {
api.v1.Update(cfg)
api.v2.Update(cfg, setAlertStatus)
api.v2.Update(cfg, setAlertStatus, receivers)
}

func (api *API) limitHandler(h http.Handler) http.Handler {
Expand Down
47 changes: 40 additions & 7 deletions api/v2/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
"github.com/prometheus/common/version"
"github.com/rs/cors"

"github.com/prometheus/alertmanager/api/metrics"
open_api_models "github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/alertmanager/api/v2/restapi"
"github.com/prometheus/alertmanager/api/v2/restapi/operations"
Expand All @@ -40,9 +39,12 @@ import (
general_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/general"
receiver_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/receiver"
silence_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/silence"

"github.com/prometheus/alertmanager/api/metrics"
"github.com/prometheus/alertmanager/cluster"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/dispatch"
"github.com/prometheus/alertmanager/notify"
"github.com/prometheus/alertmanager/pkg/labels"
"github.com/prometheus/alertmanager/provider"
"github.com/prometheus/alertmanager/silence"
Expand Down Expand Up @@ -70,7 +72,8 @@ type API struct {
logger log.Logger
m *metrics.Alerts

Handler http.Handler
Handler http.Handler
receivers []*notify.Receiver
}

type (
Expand Down Expand Up @@ -137,13 +140,14 @@ func (api *API) requestLogger(req *http.Request) log.Logger {
}

// Update sets the API struct members that may change between reloads of alertmanager.
func (api *API) Update(cfg *config.Config, setAlertStatus setAlertStatusFn) {
func (api *API) Update(cfg *config.Config, setAlertStatus setAlertStatusFn, receivers []*notify.Receiver) {
api.mtx.Lock()
defer api.mtx.Unlock()

api.alertmanagerConfig = cfg
api.route = dispatch.NewRoute(cfg.Route, nil)
api.setAlertStatus = setAlertStatus
api.receivers = receivers
}

func (api *API) getStatusHandler(params general_ops.GetStatusParams) middleware.Responder {
Expand Down Expand Up @@ -204,11 +208,40 @@ func (api *API) getStatusHandler(params general_ops.GetStatusParams) middleware.

func (api *API) getReceiversHandler(params receiver_ops.GetReceiversParams) middleware.Responder {
api.mtx.RLock()
defer api.mtx.RUnlock()
configReceivers := api.receivers
api.mtx.RUnlock()

receivers := make([]*open_api_models.Receiver, 0, len(configReceivers))
for _, r := range configReceivers {
integrations := make([]*open_api_models.Integration, 0, len(r.Integrations()))

for _, integration := range r.Integrations() {
notify, duration, err := integration.GetReport()
iname := integration.String()
sendResolved := integration.SendResolved()
integrations = append(integrations, &open_api_models.Integration{
Name: &iname,
SendResolved: &sendResolved,
LastNotifyAttempt: strfmt.DateTime(notify.UTC()),
LastNotifyAttemptDuration: duration.String(),
LastNotifyAttemptError: func() string {
if err != nil {
return err.Error()
}
return ""
}(),
})
}

rName := r.Name()
active := r.Active()
model := &open_api_models.Receiver{
Name: &rName,
Active: &active,
Integrations: integrations,
}

receivers := make([]*open_api_models.Receiver, 0, len(api.alertmanagerConfig.Receivers))
for _, r := range api.alertmanagerConfig.Receivers {
receivers = append(receivers, &open_api_models.Receiver{Name: &r.Name})
receivers = append(receivers, model)
}

return receiver_ops.NewGetReceiversOK().WithPayload(receivers)
Expand Down
6 changes: 3 additions & 3 deletions api/v2/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"io"
"net/http"
"net/http/httptest"
"strconv"
Expand Down Expand Up @@ -205,7 +205,7 @@ func TestDeleteSilenceHandler(t *testing.T) {
HTTPRequest: r,
})
responder.WriteResponse(w, p)
body, _ := ioutil.ReadAll(w.Result().Body)
body, _ := io.ReadAll(w.Result().Body)

require.Equal(t, tc.expectedCode, w.Code, fmt.Sprintf("test case: %d, response: %s", i, string(body)))
}
Expand Down Expand Up @@ -305,7 +305,7 @@ func TestPostSilencesHandler(t *testing.T) {
Silence: &sil,
})
responder.WriteResponse(w, p)
body, _ := ioutil.ReadAll(w.Result().Body)
body, _ := io.ReadAll(w.Result().Body)

require.Equal(t, tc.expectedCode, w.Code, fmt.Sprintf("test case: %d, response: %s", i, string(body)))
}
Expand Down
122 changes: 122 additions & 0 deletions api/v2/models/integration.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 52 additions & 0 deletions api/v2/models/receiver.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions api/v2/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -506,8 +506,36 @@ definitions:
properties:
name:
type: string
active:
type: boolean
integrations:
type: array
items:
$ref: '#/definitions/integration'
required:
- name
- active
- integrations
integration:
type: object
properties:
name:
type: string
sendResolved:
type: boolean
lastNotifyAttempt:
description: A timestamp indicating the last attempt to deliver a notification regardless of the outcome.
type: string
format: date-time
lastNotifyAttemptDuration:
description: Duration of the last attempt to deliver a notification in humanized format (`1s` or `15ms`, etc).
type: string
lastNotifyAttemptError:
description: Error string for the last attempt to deliver a notification. Empty if the last attempt was successful.
type: string
required:
- name
- sendResolved
labelSet:
type: object
additionalProperties:
Expand Down
Loading

0 comments on commit 42cc05b

Please sign in to comment.