Skip to content

Commit

Permalink
Merge branch 'pc/dont-fail-on-webhook-failure-and-metrics' into 'master'
Browse files Browse the repository at this point in the history
Don't fail when webhooks registration fails, and lots of metrics

See merge request yakshaving.art/git-pull-mirror!38
  • Loading branch information
Ilya Frolov committed Aug 22, 2018
2 parents 160d380 + e99a98e commit b5be55f
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 30 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ directed to it.
address in which to listen for webhooks (default ":9092")
- **-repositories.path** *string*
local path in which to store cloned repositories (default ".")
- **-skip.webhooks.registration** *bool*
don't register webhooks
- **-sshkey** *string*
ssh key to use to identify to remotes

Expand All @@ -74,8 +72,11 @@ directed to it.

| name | type | help |
|---|---|---|
| github_webhooks_up | gauge | whether the service is ready to receive requests or not |
| github_webhooks_repo_up | gauge | whether a repo is succeeding or failing to read or write |
| github_webhooks_git_latency_seconds | summary | latency percentiles of git fetch and push operations |
| github_webhooks_hooks_received_total | counter | total count of hooks received |
| github_webhooks_hooks_retried_total | counter | total number of hooks that failed and were retried |
| github_webhooks_hooks_updated_total | counter | total number of repos succefully updated |
| github_webhooks_hooks_failed_total | counter | total number of repos that failed to update for some reason |
| github_webhooks_boot_time_seconds | gauge | unix timestamp indicating when the process was started |
Expand Down
1 change: 0 additions & 1 deletion config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ type Arguments struct {

WebhooksTarget string
RepositoriesPath string
SkipRegistration bool
SSHKey string
TimeoutSeconds uint64

Expand Down
8 changes: 3 additions & 5 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,9 @@ func main() {
}

s := server.New(client, server.WebHooksServerOptions{
GitTimeoutSeconds: args.TimeoutSeconds,
RepositoriesPath: args.RepositoriesPath,
SSHPrivateKey: args.SSHKey,
SkipWebhooksRegistration: args.SkipRegistration,
GitTimeoutSeconds: args.TimeoutSeconds,
RepositoriesPath: args.RepositoriesPath,
SSHPrivateKey: args.SSHKey,
})

signalCh := make(chan os.Signal, 1)
Expand Down Expand Up @@ -120,7 +119,6 @@ func parseArgs() config.Arguments {

flag.StringVar(&args.WebhooksTarget, "webhooks.target", "github", "Used to define different kinds of webhooks clients, GitHub by default")
flag.StringVar(&args.RepositoriesPath, "repositories.path", ".", "local path in which to store cloned repositories")
flag.BoolVar(&args.SkipRegistration, "skip.webhooks.registration", false, "don't register webhooks")
flag.StringVar(&args.SSHKey, "sshkey", os.Getenv("SSH_KEY"), "ssh key to use to identify to remotes")
flag.Uint64Var(&args.TimeoutSeconds, "git.timeout.seconds", 60, "git operations timeout in seconds")

Expand Down
24 changes: 24 additions & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,30 @@ var subsystem = "webhooks"

// Prometheus metrics
var (
ServerIsUp = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "up",
Help: "whether the service is ready to receive requests or not",
})
RepoIsUp = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "repo_up",
Help: "whether a repo is succeeding or failing to read or write",
}, []string{"repo"})
HooksReceivedTotal = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "hooks_received_total",
Help: "total number of hooks received",
})
HooksRetriedTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "hooks_retried_total",
Help: "total number of hooks that failed and were retried",
}, []string{"repo"})
HooksAcceptedTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Expand Down Expand Up @@ -59,13 +77,19 @@ var (

func init() {
bootTime.Set(float64(time.Now().Unix()))
ServerIsUp.Set(0)

prometheus.MustRegister(bootTime)
prometheus.MustRegister(LastSuccessfulConfigApply)
prometheus.MustRegister(HooksReceivedTotal)
prometheus.MustRegister(HooksAcceptedTotal)
prometheus.MustRegister(HooksUpdatedTotal)
prometheus.MustRegister(HooksFailedTotal)
prometheus.MustRegister(GitLatencySecondsTotal)
prometheus.MustRegister(RepoIsUp)
prometheus.MustRegister(ServerIsUp)
prometheus.MustRegister(HooksRetriedTotal)

http.Handle("/metrics", prometheus.Handler())

}
12 changes: 12 additions & 0 deletions metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ func TestMetricsAreRegistered(t *testing.T) {
name string
collector prometheus.Collector
}{
{
"server is up",
metrics.ServerIsUp,
},
{
"repo is up",
metrics.RepoIsUp,
},
{
"latency seconds",
metrics.GitLatencySecondsTotal,
Expand All @@ -19,6 +27,10 @@ func TestMetricsAreRegistered(t *testing.T) {
"hooks accepted",
metrics.HooksAcceptedTotal,
},
{
"hook retried",
metrics.HooksRetriedTotal,
},
{
"hooks failed",
metrics.HooksFailedTotal,
Expand Down
6 changes: 3 additions & 3 deletions server/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@ import (

"github.com/jpillora/backoff"
"github.com/sirupsen/logrus"
"gitlab.com/yakshaving.art/git-pull-mirror/metrics"
"gitlab.com/yakshaving.art/git-pull-mirror/url"

"golang.org/x/crypto/ssh"
gitssh "gopkg.in/src-d/go-git.v4/plumbing/transport/ssh"

git "gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/config"
"gopkg.in/src-d/go-git.v4/plumbing/transport"
gitssh "gopkg.in/src-d/go-git.v4/plumbing/transport/ssh"
)

// Remotes names
Expand Down Expand Up @@ -231,6 +230,7 @@ func (r Repository) Push() error {

if err != nil && b.Attempt() < 3 {
logrus.Warnf("failed to push to remote repo %s: %s... retrying", r.target, err)
metrics.HooksRetriedTotal.WithLabelValues(r.target.String()).Inc()
time.Sleep(b.Duration())
continue
}
Expand Down
45 changes: 29 additions & 16 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ type WebHooksServer struct {
config config.Config
repositories map[string]Repository
running bool
ready bool
callbackPath string
}

// WebHooksServerOptions holds server configuration options
type WebHooksServerOptions struct {
GitTimeoutSeconds uint64
RepositoriesPath string
SSHPrivateKey string
SkipWebhooksRegistration bool
GitTimeoutSeconds uint64
RepositoriesPath string
SSHPrivateKey string
}

// New returns a new unconfigured webhooks server
Expand All @@ -49,12 +49,6 @@ func New(client webhooks.Client, opts WebHooksServerOptions) *WebHooksServer {
func (ws *WebHooksServer) Configure(c config.Config) error {
logrus.Debug("loading configuration")

client := ws.WebHooksClient
callback, err := url.ParseRequestURI(client.GetCallbackURL())
if err != nil {
return fmt.Errorf("could not parse callback url %s: %s", client.GetCallbackURL(), err)
}

g := newGitClient(ws.opts)

repositories := make(map[string]Repository, len(c.Repositories))
Expand All @@ -69,19 +63,19 @@ func (ws *WebHooksServer) Configure(c config.Config) error {
repo, err := g.CloneOrOpen(r.OriginURL, r.TargetURL)
if err != nil {
errors <- fmt.Errorf("failed to clone or open %s: %s", r.OriginURL, err)
metrics.RepoIsUp.WithLabelValues(r.OriginURL.ToPath()).Set(0)
return
}

if err = repo.Fetch(); err != nil {
errors <- fmt.Errorf("failed to fetch %s: %s", r.OriginURL, err)
metrics.RepoIsUp.WithLabelValues(r.OriginURL.ToPath()).Set(0)
return
}

if !ws.opts.SkipWebhooksRegistration {
if err = client.RegisterWebhook(r.OriginURL); err != nil {
errors <- fmt.Errorf("failed to register webhooks for %s: %s", r.OriginURL, err)
return
}
if err = ws.WebHooksClient.RegisterWebhook(r.OriginURL); err != nil {
// We're skipping these errors on purporse to allow the server to boot up even if webhooks fail
logrus.Warnf("failed to register webhooks for %s: %s", r.OriginURL, err)
}

repositories[r.OriginURL.ToKey()] = repo
Expand All @@ -104,9 +98,10 @@ func (ws *WebHooksServer) Configure(c config.Config) error {
ws.lock.Lock()
defer ws.lock.Unlock()

ws.callbackPath = callback.Path
ws.config = c
ws.repositories = repositories
ws.ready = true
metrics.ServerIsUp.Set(1)

metrics.LastSuccessfulConfigApply.Set(float64(time.Now().Unix()))

Expand All @@ -121,6 +116,12 @@ func (ws *WebHooksServer) Run(address string, c config.Config, ready chan interf
logrus.Warnf("failed to configure server propertly: %s", err)
}

callback, err := url.ParseRequestURI(ws.WebHooksClient.GetCallbackURL())
if err != nil {
logrus.Fatalf("could not parse callback url %s: %s", ws.WebHooksClient.GetCallbackURL(), err)
}
ws.callbackPath = callback.Path

http.HandleFunc(ws.callbackPath, ws.WebHookHandler)
logrus.Infof("starting listener on %s", address)
ws.running = true
Expand All @@ -147,6 +148,9 @@ func (ws *WebHooksServer) WebHookHandler(w http.ResponseWriter, r *http.Request)
http.Error(w, "server is shutting down", http.StatusServiceUnavailable)
return
}
if !ws.ready {
http.Error(w, "Server is not ready to receive requests", http.StatusServiceUnavailable)
}

if r.Method != "POST" {
http.Error(w, fmt.Sprintf("only POST is allowed"), http.StatusBadRequest)
Expand Down Expand Up @@ -201,6 +205,11 @@ func (ws *WebHooksServer) WebHookHandler(w http.ResponseWriter, r *http.Request)

// UpdateAll triggers an update for all the repositories
func (ws *WebHooksServer) UpdateAll() {
if !ws.ready {
logrus.Warnf("Can't update all repos when the service is not ready")
return
}

for _, repo := range ws.repositories {
ws.wg.Add(1)
go ws.updateRepository("USR2", repo)
Expand All @@ -214,19 +223,23 @@ func (ws *WebHooksServer) updateRepository(requestID string, repo Repository) {
if err := repo.Fetch(); err != nil {
logrus.Errorf("failed to fetch repo %s for request %s: %s", repo.origin, requestID, err)
metrics.HooksFailedTotal.WithLabelValues(repo.origin.ToPath()).Inc()
metrics.RepoIsUp.WithLabelValues(repo.origin.ToPath()).Set(0)
return
}
metrics.GitLatencySecondsTotal.WithLabelValues("fetch", repo.origin.ToPath()).Observe(((time.Now().Sub(startFetch)).Seconds()))
metrics.HooksUpdatedTotal.WithLabelValues(repo.origin.ToPath()).Inc()
metrics.RepoIsUp.WithLabelValues(repo.origin.ToPath()).Set(1)

startPush := time.Now()
if err := repo.Push(); err != nil {
logrus.Errorf("failed to push repo %s to %s for request %s: %s", repo.origin, repo.target, requestID, err)
metrics.HooksFailedTotal.WithLabelValues(repo.target.ToPath()).Inc()
metrics.RepoIsUp.WithLabelValues(repo.target.ToPath()).Set(0)
return
}
metrics.GitLatencySecondsTotal.WithLabelValues("push", repo.target.ToPath()).Observe(((time.Now().Sub(startPush)).Seconds()))
metrics.HooksUpdatedTotal.WithLabelValues(repo.target.ToPath()).Inc()
metrics.RepoIsUp.WithLabelValues(repo.target.ToPath()).Set(1)

logrus.Debugf("repository %s pushed to %s for request %s", repo.origin, repo.target, requestID)
}
5 changes: 2 additions & 3 deletions server/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@ func TestBuildingAServerAndConfigureWithEmptyConfigWorks(t *testing.T) {
User: "myuser",
})
s := New(client, WebHooksServerOptions{
GitTimeoutSeconds: 10,
RepositoriesPath: tmpDir,
SkipWebhooksRegistration: true,
GitTimeoutSeconds: 10,
RepositoriesPath: tmpDir,
})
originURL, err := url.Parse("https://github.com/yakshaving-art/git-pull-mirror.git")
must(t, "could not parse origin url", err)
Expand Down

0 comments on commit b5be55f

Please sign in to comment.