From e99a98eaf794466b0e9707745f3f8ffe404bfc78 Mon Sep 17 00:00:00 2001 From: Pablo Carranza Date: Wed, 22 Aug 2018 22:41:15 +0000 Subject: [PATCH] Don't fail when webhooks registration fails, and lots of metrics --- README.md | 5 +++-- config/config.go | 1 - main.go | 8 +++----- metrics/metrics.go | 24 ++++++++++++++++++++++ metrics/metrics_test.go | 12 +++++++++++ server/git.go | 6 +++--- server/server.go | 45 ++++++++++++++++++++++++++--------------- server/server_test.go | 5 ++--- 8 files changed, 76 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index b9d103b..aa4b322 100644 --- a/README.md +++ b/README.md @@ -52,8 +52,6 @@ directed to it. address in which to listen for webhooks (default ":9092") - **-repositories.path** *string* local path in which to store cloned repositories (default ".") -- **-skip.webhooks.registration** *bool* - don't register webhooks - **-sshkey** *string* ssh key to use to identify to remotes @@ -74,8 +72,11 @@ directed to it. | name | type | help | |---|---|---| +| github_webhooks_up | gauge | whether the service is ready to receive requests or not | +| github_webhooks_repo_up | gauge | whether a repo is succeeding or failing to read or write | | github_webhooks_git_latency_seconds | summary | latency percentiles of git fetch and push operations | | github_webhooks_hooks_received_total | counter | total count of hooks received | +| github_webhooks_hooks_retried_total | counter | total number of hooks that failed and were retried | | github_webhooks_hooks_updated_total | counter | total number of repos succefully updated | | github_webhooks_hooks_failed_total | counter | total number of repos that failed to update for some reason | | github_webhooks_boot_time_seconds | gauge | unix timestamp indicating when the process was started | diff --git a/config/config.go b/config/config.go index ce949b6..1c47e81 100644 --- a/config/config.go +++ b/config/config.go @@ -41,7 +41,6 @@ type Arguments struct { WebhooksTarget string RepositoriesPath string - SkipRegistration bool SSHKey string TimeoutSeconds uint64 diff --git a/main.go b/main.go index 59eb853..bf40bf1 100644 --- a/main.go +++ b/main.go @@ -53,10 +53,9 @@ func main() { } s := server.New(client, server.WebHooksServerOptions{ - GitTimeoutSeconds: args.TimeoutSeconds, - RepositoriesPath: args.RepositoriesPath, - SSHPrivateKey: args.SSHKey, - SkipWebhooksRegistration: args.SkipRegistration, + GitTimeoutSeconds: args.TimeoutSeconds, + RepositoriesPath: args.RepositoriesPath, + SSHPrivateKey: args.SSHKey, }) signalCh := make(chan os.Signal, 1) @@ -120,7 +119,6 @@ func parseArgs() config.Arguments { flag.StringVar(&args.WebhooksTarget, "webhooks.target", "github", "Used to define different kinds of webhooks clients, GitHub by default") flag.StringVar(&args.RepositoriesPath, "repositories.path", ".", "local path in which to store cloned repositories") - flag.BoolVar(&args.SkipRegistration, "skip.webhooks.registration", false, "don't register webhooks") flag.StringVar(&args.SSHKey, "sshkey", os.Getenv("SSH_KEY"), "ssh key to use to identify to remotes") flag.Uint64Var(&args.TimeoutSeconds, "git.timeout.seconds", 60, "git operations timeout in seconds") diff --git a/metrics/metrics.go b/metrics/metrics.go index 3a8b8b3..25c1454 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -12,12 +12,30 @@ var subsystem = "webhooks" // Prometheus metrics var ( + ServerIsUp = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "up", + Help: "whether the service is ready to receive requests or not", + }) + RepoIsUp = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "repo_up", + Help: "whether a repo is succeeding or failing to read or write", + }, []string{"repo"}) HooksReceivedTotal = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, Name: "hooks_received_total", Help: "total number of hooks received", }) + HooksRetriedTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "hooks_retried_total", + Help: "total number of hooks that failed and were retried", + }, []string{"repo"}) HooksAcceptedTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, @@ -59,6 +77,8 @@ var ( func init() { bootTime.Set(float64(time.Now().Unix())) + ServerIsUp.Set(0) + prometheus.MustRegister(bootTime) prometheus.MustRegister(LastSuccessfulConfigApply) prometheus.MustRegister(HooksReceivedTotal) @@ -66,6 +86,10 @@ func init() { prometheus.MustRegister(HooksUpdatedTotal) prometheus.MustRegister(HooksFailedTotal) prometheus.MustRegister(GitLatencySecondsTotal) + prometheus.MustRegister(RepoIsUp) + prometheus.MustRegister(ServerIsUp) + prometheus.MustRegister(HooksRetriedTotal) http.Handle("/metrics", prometheus.Handler()) + } diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index 5a3e694..54eeefa 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -11,6 +11,14 @@ func TestMetricsAreRegistered(t *testing.T) { name string collector prometheus.Collector }{ + { + "server is up", + metrics.ServerIsUp, + }, + { + "repo is up", + metrics.RepoIsUp, + }, { "latency seconds", metrics.GitLatencySecondsTotal, @@ -19,6 +27,10 @@ func TestMetricsAreRegistered(t *testing.T) { "hooks accepted", metrics.HooksAcceptedTotal, }, + { + "hook retried", + metrics.HooksRetriedTotal, + }, { "hooks failed", metrics.HooksFailedTotal, diff --git a/server/git.go b/server/git.go index 3980c5d..d41bd9b 100644 --- a/server/git.go +++ b/server/git.go @@ -10,14 +10,13 @@ import ( "github.com/jpillora/backoff" "github.com/sirupsen/logrus" + "gitlab.com/yakshaving.art/git-pull-mirror/metrics" "gitlab.com/yakshaving.art/git-pull-mirror/url" - "golang.org/x/crypto/ssh" - gitssh "gopkg.in/src-d/go-git.v4/plumbing/transport/ssh" - git "gopkg.in/src-d/go-git.v4" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/plumbing/transport" + gitssh "gopkg.in/src-d/go-git.v4/plumbing/transport/ssh" ) // Remotes names @@ -231,6 +230,7 @@ func (r Repository) Push() error { if err != nil && b.Attempt() < 3 { logrus.Warnf("failed to push to remote repo %s: %s... retrying", r.target, err) + metrics.HooksRetriedTotal.WithLabelValues(r.target.String()).Inc() time.Sleep(b.Duration()) continue } diff --git a/server/server.go b/server/server.go index a403924..11caec0 100644 --- a/server/server.go +++ b/server/server.go @@ -24,15 +24,15 @@ type WebHooksServer struct { config config.Config repositories map[string]Repository running bool + ready bool callbackPath string } // WebHooksServerOptions holds server configuration options type WebHooksServerOptions struct { - GitTimeoutSeconds uint64 - RepositoriesPath string - SSHPrivateKey string - SkipWebhooksRegistration bool + GitTimeoutSeconds uint64 + RepositoriesPath string + SSHPrivateKey string } // New returns a new unconfigured webhooks server @@ -49,12 +49,6 @@ func New(client webhooks.Client, opts WebHooksServerOptions) *WebHooksServer { func (ws *WebHooksServer) Configure(c config.Config) error { logrus.Debug("loading configuration") - client := ws.WebHooksClient - callback, err := url.ParseRequestURI(client.GetCallbackURL()) - if err != nil { - return fmt.Errorf("could not parse callback url %s: %s", client.GetCallbackURL(), err) - } - g := newGitClient(ws.opts) repositories := make(map[string]Repository, len(c.Repositories)) @@ -69,19 +63,19 @@ func (ws *WebHooksServer) Configure(c config.Config) error { repo, err := g.CloneOrOpen(r.OriginURL, r.TargetURL) if err != nil { errors <- fmt.Errorf("failed to clone or open %s: %s", r.OriginURL, err) + metrics.RepoIsUp.WithLabelValues(r.OriginURL.ToPath()).Set(0) return } if err = repo.Fetch(); err != nil { errors <- fmt.Errorf("failed to fetch %s: %s", r.OriginURL, err) + metrics.RepoIsUp.WithLabelValues(r.OriginURL.ToPath()).Set(0) return } - if !ws.opts.SkipWebhooksRegistration { - if err = client.RegisterWebhook(r.OriginURL); err != nil { - errors <- fmt.Errorf("failed to register webhooks for %s: %s", r.OriginURL, err) - return - } + if err = ws.WebHooksClient.RegisterWebhook(r.OriginURL); err != nil { + // We're skipping these errors on purporse to allow the server to boot up even if webhooks fail + logrus.Warnf("failed to register webhooks for %s: %s", r.OriginURL, err) } repositories[r.OriginURL.ToKey()] = repo @@ -104,9 +98,10 @@ func (ws *WebHooksServer) Configure(c config.Config) error { ws.lock.Lock() defer ws.lock.Unlock() - ws.callbackPath = callback.Path ws.config = c ws.repositories = repositories + ws.ready = true + metrics.ServerIsUp.Set(1) metrics.LastSuccessfulConfigApply.Set(float64(time.Now().Unix())) @@ -121,6 +116,12 @@ func (ws *WebHooksServer) Run(address string, c config.Config, ready chan interf logrus.Warnf("failed to configure server propertly: %s", err) } + callback, err := url.ParseRequestURI(ws.WebHooksClient.GetCallbackURL()) + if err != nil { + logrus.Fatalf("could not parse callback url %s: %s", ws.WebHooksClient.GetCallbackURL(), err) + } + ws.callbackPath = callback.Path + http.HandleFunc(ws.callbackPath, ws.WebHookHandler) logrus.Infof("starting listener on %s", address) ws.running = true @@ -147,6 +148,9 @@ func (ws *WebHooksServer) WebHookHandler(w http.ResponseWriter, r *http.Request) http.Error(w, "server is shutting down", http.StatusServiceUnavailable) return } + if !ws.ready { + http.Error(w, "Server is not ready to receive requests", http.StatusServiceUnavailable) + } if r.Method != "POST" { http.Error(w, fmt.Sprintf("only POST is allowed"), http.StatusBadRequest) @@ -201,6 +205,11 @@ func (ws *WebHooksServer) WebHookHandler(w http.ResponseWriter, r *http.Request) // UpdateAll triggers an update for all the repositories func (ws *WebHooksServer) UpdateAll() { + if !ws.ready { + logrus.Warnf("Can't update all repos when the service is not ready") + return + } + for _, repo := range ws.repositories { ws.wg.Add(1) go ws.updateRepository("USR2", repo) @@ -214,19 +223,23 @@ func (ws *WebHooksServer) updateRepository(requestID string, repo Repository) { if err := repo.Fetch(); err != nil { logrus.Errorf("failed to fetch repo %s for request %s: %s", repo.origin, requestID, err) metrics.HooksFailedTotal.WithLabelValues(repo.origin.ToPath()).Inc() + metrics.RepoIsUp.WithLabelValues(repo.origin.ToPath()).Set(0) return } metrics.GitLatencySecondsTotal.WithLabelValues("fetch", repo.origin.ToPath()).Observe(((time.Now().Sub(startFetch)).Seconds())) metrics.HooksUpdatedTotal.WithLabelValues(repo.origin.ToPath()).Inc() + metrics.RepoIsUp.WithLabelValues(repo.origin.ToPath()).Set(1) startPush := time.Now() if err := repo.Push(); err != nil { logrus.Errorf("failed to push repo %s to %s for request %s: %s", repo.origin, repo.target, requestID, err) metrics.HooksFailedTotal.WithLabelValues(repo.target.ToPath()).Inc() + metrics.RepoIsUp.WithLabelValues(repo.target.ToPath()).Set(0) return } metrics.GitLatencySecondsTotal.WithLabelValues("push", repo.target.ToPath()).Observe(((time.Now().Sub(startPush)).Seconds())) metrics.HooksUpdatedTotal.WithLabelValues(repo.target.ToPath()).Inc() + metrics.RepoIsUp.WithLabelValues(repo.target.ToPath()).Set(1) logrus.Debugf("repository %s pushed to %s for request %s", repo.origin, repo.target, requestID) } diff --git a/server/server_test.go b/server/server_test.go index a034dcc..4929fc6 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -28,9 +28,8 @@ func TestBuildingAServerAndConfigureWithEmptyConfigWorks(t *testing.T) { User: "myuser", }) s := New(client, WebHooksServerOptions{ - GitTimeoutSeconds: 10, - RepositoriesPath: tmpDir, - SkipWebhooksRegistration: true, + GitTimeoutSeconds: 10, + RepositoriesPath: tmpDir, }) originURL, err := url.Parse("https://github.com/yakshaving-art/git-pull-mirror.git") must(t, "could not parse origin url", err)