Skip to content

Commit

Permalink
metrics: fix review findings
Browse files Browse the repository at this point in the history
  • Loading branch information
maigl committed Jan 26, 2023
1 parent 082cbd5 commit c7330c0
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 82 deletions.
31 changes: 14 additions & 17 deletions apiserver/controllers/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package controllers

import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
Expand All @@ -36,12 +35,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
)

func NewAPIController(r *runner.Runner, auth *auth.Authenticator, hub *wsWriter.Hub) (*APIController, error) {
id, err := r.GetControllerID()
if err != nil {
return nil, errors.Wrap(err, "getting controller ID")
}

func NewAPIController(r *runner.Runner, auth *auth.Authenticator, hub *wsWriter.Hub, controllerInfo runnerParams.ControllerInfo) (*APIController, error) {
return &APIController{
r: r,
auth: auth,
Expand All @@ -50,17 +44,16 @@ func NewAPIController(r *runner.Runner, auth *auth.Authenticator, hub *wsWriter.
ReadBufferSize: 1024,
WriteBufferSize: 16384,
},
id: id.String(),
controllerInfo: controllerInfo,
}, nil
}

type APIController struct {
r *runner.Runner
auth *auth.Authenticator
hub *wsWriter.Hub
upgrader websocket.Upgrader
// holds this controller's id
id string
r *runner.Runner
auth *auth.Authenticator
hub *wsWriter.Hub
upgrader websocket.Upgrader
controllerInfo runnerParams.ControllerInfo
}

func handleError(w http.ResponseWriter, err error) {
Expand Down Expand Up @@ -110,7 +103,7 @@ func (a *APIController) GetControllerInfo() (hostname, controllerId string) {
return "", ""
}

return hostname, a.id
return hostname, a.controllerInfo.ControllerID.String()
}

// metric to count total webhooks received
Expand Down Expand Up @@ -225,14 +218,18 @@ func (a *APIController) NotFoundHandler(w http.ResponseWriter, r *http.Request)
func (a *APIController) MetricsTokenHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()

if !auth.IsAdmin(ctx) {
handleError(w, gErrors.ErrUnauthorized)
return
}

token, err := a.auth.GetJWTMetricsToken(ctx)
if err != nil {
handleError(w, err)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
fmt.Fprintf(w, `{"token": "%s"}`, token)
json.NewEncoder(w).Encode(runnerParams.JWTResponse{Token: token})
}

// LoginHandler returns a jwt token
Expand Down
59 changes: 45 additions & 14 deletions apiserver/controllers/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,79 @@ package controllers

import (
"log"
"os"

"garm/auth"
"garm/runner"

"github.com/prometheus/client_golang/prometheus"
)

type GarmCollector struct {
healthMetric *prometheus.Desc
instanceMetric *prometheus.Desc
apiController *APIController
runner *runner.Runner
}

func NewGarmCollector(a *APIController) *GarmCollector {
func NewGarmCollector(r *runner.Runner) *GarmCollector {
return &GarmCollector{
apiController: a,
runner: r,
instanceMetric: prometheus.NewDesc(
"garm_runner_status",
"Status of the runner",
[]string{"name", "status", "runner_status", "pool", "pool_type", "hostname", "controller_id"}, nil,
)}
[]string{"name", "status", "runner_status", "pool_owner", "pool_type", "pool_id", "hostname", "controller_id"}, nil,
),
healthMetric: prometheus.NewDesc(
"garm_health",
"Health of the runner",
[]string{"hostname", "controller_id"}, nil,
),
}
}

func (c *GarmCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.instanceMetric
ch <- c.healthMetric
}

func (c *GarmCollector) Collect(ch chan<- prometheus.Metric) {
c.CollectInstanceMetric(ch)
controllerID, err := c.runner.GetControllerID(auth.GetAdminContext())
if err != nil {
log.Printf("cannot collect metrics, getting controller ID: %s", err)
}

hostname, err := os.Hostname()
if err != nil {
log.Printf("cannot collect metrics, getting hostname: %s", err)
}

c.CollectInstanceMetric(ch, hostname, controllerID.String())
c.CollectHealthMetric(ch, hostname, controllerID.String())
}

func (c *GarmCollector) CollectHealthMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
ch <- prometheus.MustNewConstMetric(
c.healthMetric,
prometheus.GaugeValue,
1,
hostname,
controllerID,
)
}

// CollectInstanceMetric collects the metrics for the runner instances
// reflecting the statuses and the pool they belong to.
func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric) {
func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {

ctx := auth.GetAdminContext()

instances, err := c.apiController.r.ListAllInstances(ctx)
instances, err := c.runner.ListAllInstances(ctx)
if err != nil {
log.Printf("cannot collect metrics, listing instances: %s", err)
return
}

pools, err := c.apiController.r.ListAllPools(ctx)
pools, err := c.runner.ListAllPools(ctx)
if err != nil {
log.Printf("listing pools: %s", err)
// continue anyway
Expand All @@ -58,23 +90,21 @@ func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric) {
if pool.EnterpriseName != "" {
poolNames[pool.ID] = poolInfo{
Name: pool.EnterpriseName,
Type: "enterprise",
Type: string(pool.PoolType()),
}
} else if pool.OrgName != "" {
poolNames[pool.ID] = poolInfo{
Name: pool.OrgName,
Type: "organization",
Type: string(pool.PoolType()),
}
} else {
poolNames[pool.ID] = poolInfo{
Name: pool.RepoName,
Type: "repository",
Type: string(pool.PoolType()),
}
}
}

hostname, controllerID := c.apiController.GetControllerInfo()

for _, instance := range instances {

m, err := prometheus.NewConstMetric(
Expand All @@ -86,6 +116,7 @@ func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric) {
string(instance.RunnerStatus),
poolNames[instance.PoolID].Name,
poolNames[instance.PoolID].Type,
instance.PoolID,
hostname,
controllerID,
)
Expand Down
13 changes: 12 additions & 1 deletion apiserver/routers/routers.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,28 @@ import (
"net/http"

"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus/promhttp"

"garm/apiserver/controllers"
"garm/auth"
"garm/config"
"garm/util"
)

func NewAPIRouter(han *controllers.APIController, logWriter io.Writer, authMiddleware, initMiddleware, instanceMiddleware auth.Middleware) *mux.Router {
func NewAPIRouter(han *controllers.APIController, logWriter io.Writer, cfg *config.Config, authMiddleware, initMiddleware, instanceMiddleware, metricsMiddlerware auth.Middleware) *mux.Router {
router := mux.NewRouter()
logMiddleware := util.NewLoggingMiddleware(logWriter)
router.Use(logMiddleware)

if !cfg.Metrics.Disable {
metricsRouter := router.PathPrefix("/metrics").Subrouter()
if !cfg.Metrics.NoAuth {
metricsRouter.Use(metricsMiddlerware.Middleware)
}
metricsRouter.Handle("/", promhttp.Handler()).Methods("GET", "OPTIONS")
metricsRouter.Handle("", promhttp.Handler()).Methods("GET", "OPTIONS")
}

// Handles github webhooks
webhookRouter := router.PathPrefix("/webhooks").Subrouter()
webhookRouter.PathPrefix("/").Handler(http.HandlerFunc(han.CatchAll))
Expand Down
6 changes: 5 additions & 1 deletion auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ func (a *Authenticator) GetJWTToken(ctx context.Context) (string, error) {
// GetJWTMetricsToken returns a JWT token that can be used to read metrics.
// This token is not tied to a user, no user is stored in the db.
func (a *Authenticator) GetJWTMetricsToken(ctx context.Context) (string, error) {

if !IsAdmin(ctx) {
return "", runnerErrors.ErrUnauthorized
}

tokenID, err := util.GetRandomString(16)
if err != nil {
return "", errors.Wrap(err, "generating random string")
Expand All @@ -88,7 +93,6 @@ func (a *Authenticator) GetJWTMetricsToken(ctx context.Context) (string, error)
// TODO: make this configurable
Issuer: "garm",
},
UserID: "metrics",
TokenID: tokenID,
IsAdmin: false,
ReadMetrics: true,
Expand Down
5 changes: 3 additions & 2 deletions auth/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ import (
type contextFlags string

const (
isAdminKey contextFlags = "is_admin"
fullNameKey contextFlags = "full_name"
isAdminKey contextFlags = "is_admin"
fullNameKey contextFlags = "full_name"
readMetricsKey contextFlags = "read_metrics"
// UserIDFlag is the User ID flag we set in the context
UserIDFlag contextFlags = "user_id"
isEnabledFlag contextFlags = "is_enabled"
Expand Down
8 changes: 6 additions & 2 deletions auth/metrics.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package auth

import (
"context"
"fmt"
"garm/config"
"net/http"
Expand All @@ -13,10 +14,10 @@ type MetricsMiddleware struct {
cfg config.JWTAuth
}

func NewMetricsMiddleware(cfg config.JWTAuth) *MetricsMiddleware {
func NewMetricsMiddleware(cfg config.JWTAuth) (*MetricsMiddleware, error) {
return &MetricsMiddleware{
cfg: cfg,
}
}, nil
}

func (m *MetricsMiddleware) Middleware(next http.Handler) http.Handler {
Expand Down Expand Up @@ -59,6 +60,9 @@ func (m *MetricsMiddleware) Middleware(next http.Handler) http.Handler {
return
}

ctx = context.WithValue(ctx, isAdminKey, false)
ctx = context.WithValue(ctx, readMetricsKey, true)

next.ServeHTTP(w, r.WithContext(ctx))
})
}
41 changes: 16 additions & 25 deletions cmd/garm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ import (
"github.com/gorilla/mux"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

var (
Expand Down Expand Up @@ -112,14 +111,19 @@ func main() {
log.Fatalf("failed to create controller: %+v", err)
}

controllerInfo, err := db.ControllerInfo()
if err != nil {
log.Fatal(err)
}

// If there are many repos/pools, this may take a long time.
// TODO: start pool managers in the background and log errors.
if err := runner.Start(); err != nil {
log.Fatal(err)
}

authenticator := auth.NewAuthenticator(cfg.JWTAuth, db)
controller, err := controllers.NewAPIController(runner, authenticator, hub)
controller, err := controllers.NewAPIController(runner, authenticator, hub, controllerInfo)
if err != nil {
log.Fatalf("failed to create controller: %+v", err)
}
Expand All @@ -139,7 +143,16 @@ func main() {
log.Fatal(err)
}

router := routers.NewAPIRouter(controller, multiWriter, jwtMiddleware, initMiddleware, instanceMiddleware)
metricsMiddleware, err := auth.NewMetricsMiddleware(cfg.JWTAuth)
if err != nil {
log.Fatal(err)
}
err = prometheus.Register(controllers.NewGarmCollector(runner))
if err != nil {
log.Printf("failed to register garm collector in prometheus", err)
}

router := routers.NewAPIRouter(controller, multiWriter, cfg, jwtMiddleware, initMiddleware, instanceMiddleware, metricsMiddleware)
corsMw := mux.CORSMethodMiddleware(router)
router.Use(corsMw)

Expand Down Expand Up @@ -170,28 +183,6 @@ func main() {
}
}()

if !cfg.APIServer.MetricsConfig.Disabled {
go func() {

metricsMiddleware := auth.NewMetricsMiddleware(cfg.JWTAuth)

r := mux.NewRouter()
r.Handle("/metrics", promhttp.Handler())
if !cfg.APIServer.MetricsConfig.NoAuth {
r.Use(metricsMiddleware.Middleware)
}

err := prometheus.Register(controllers.NewGarmCollector(controller))
if err != nil {
log.Printf("failed to register prometheus collector: %+v", err)
}

if err := http.ListenAndServe(cfg.APIServer.MetricsBindAddress(), r); err != nil {
log.Printf("metrics server failed: %+v", err)
}
}()
}

<-ctx.Done()
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 60*time.Second)
defer shutdownCancel()
Expand Down
Loading

0 comments on commit c7330c0

Please sign in to comment.