From 8442695efb5642cfa396205670a594975ef372bc Mon Sep 17 00:00:00 2001 From: penmanglewood Date: Tue, 28 Jun 2022 16:53:36 +0800 Subject: [PATCH] Parameterize pagespeed categories (#45) This creates an environment variable and CLI flag to set which pagespeed categories to fetch. By default, it fetches all five, preserving this package's default functionality. If categories are specified in a target as JSON, it will supercede the categories specified via the envvar or flag. --- README.md | 26 +++++--- collector/collector.go | 33 +++++----- collector/model.go | 63 +++++++++++++++--- collector/model_test.go | 138 +++++++++++++++++++++++++++++++-------- collector/scrape.go | 2 +- collector/scrape_test.go | 2 +- handler/probe.go | 6 +- handler/probe_test.go | 2 +- pagespeed_exporter.go | 16 +++-- 9 files changed, 218 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 91a7a2e..3e9e8c4 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,8 @@ The provided dashboard (Pagespeed) will be loaded with data after the first scra The dashboard can be found at [grafana](https://grafana.com/dashboards/9510) +Note: The example dashboard assumes you're fetching all pagespeed categories. + ## Understanding Metrics * https://github.com/GoogleChrome/lighthouse/blob/master/docs/understanding-results.md @@ -62,12 +64,16 @@ Or via JSON which adds additional parameters // URL can't be invalid // Strategy can only be mobile/desktop // If strategy is not specified, both desktop & mobile will be used +// Categories can be any of accessibility/best-practices/performance/pwa/seo +// If categories are not specified, all categories will be used // Parameters are passed down to google pagespeed api {"url":"https://github.com/foomo/pagespeed_exporter","campaign":"test","locale":"en","source":"source"} {"url":"https://mysite.com/test?test=true","strategy":"mobile"} +{"url":"https://mysite.com/test?test=true","categories": ["best-practices"]} + ``` Configuration specification in JSON and plain is supported both in command line & prometheus configuration @@ -76,15 +82,16 @@ Configuration specification in JSON and plain is supported both in command line Configuration of targets can be done via docker and via prometheus -| Flag | Variable | Description | Default | Required | -|------------------|--------------------|-----------------------------------------------|--------------------|----------| -| -api-key | PAGESPEED_API_KEY | sets the google API key used for pagespeed | | False | -| -targets | PAGESPEED_TARGETS | comma separated list of targets to measure | | False | -| -t | NONE | multi-value target array (check docker comp) | | False | -| -listener | PAGESPEED_LISTENER | sets the listener address for the exporters | :9271 | False | -| -parallel | PAGESPEED_PARALLEL | sets the execution of targets to be parallel | false | False | -| -pushGatewayUrl | PUSHGATEWAY_URL | sets the pushgateway url to send the metrics | | False | -| -pushGatewayJob | PUSHGATEWAY_JOB | sets the pushgateway job name | pagespeed_exporter | False | +| Flag | Variable | Description | Default | Required | +|------------------|----------------------|-----------------------------------------------|--------------------------------------------------|----------| +| -api-key | PAGESPEED_API_KEY | sets the google API key used for pagespeed | | False | +| -targets | PAGESPEED_TARGETS | comma separated list of targets to measure | | False | +| -categories | PAGESPEED_CATEGORIES | comma separated list of categories to check | accessibility,best-practices,performance,pwa,seo | False | +| -t | NONE | multi-value target array (check docker comp) | | False | +| -listener | PAGESPEED_LISTENER | sets the listener address for the exporters | :9271 | False | +| -parallel | PAGESPEED_PARALLEL | sets the execution of targets to be parallel | false | False | +| -pushGatewayUrl | PUSHGATEWAY_URL | sets the pushgateway url to send the metrics | | False | +| -pushGatewayJob | PUSHGATEWAY_JOB | sets the pushgateway job name | pagespeed_exporter | False | Note: google api key is required only if scraping more than 2 targets/second @@ -137,6 +144,7 @@ or $ docker run -p "9271:9271" --rm \ --env PAGESPEED_API_KEY={KEY} \ --env PAGESPEED_TARGETS=https://google.com,https://prometheus.io \ + --env PAGESPEED_CATEGORIES=accessibility,pwa \ foomo/pagespeed_exporter ``` diff --git a/collector/collector.go b/collector/collector.go index f407dd8..dbee295 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -18,6 +18,11 @@ var ( _ Factory = factory{} ) +var ( + timeValueRe = regexp.MustCompile(`(\d*[.]?\d+(ms|s))|0`) + timeUnitRe = regexp.MustCompile(`(ms|s)`) +) + type Factory interface { Create(config Config) (prometheus.Collector, error) } @@ -124,7 +129,7 @@ func collect(scrape *ScrapeResult, ch chan<- prometheus.Metric) error { } if r.LighthouseResult != nil { - collectLighthouseResults("lighthouse", r.LighthouseResult, constLabels, ch) + collectLighthouseResults("lighthouse", scrape.Request.Categories, r.LighthouseResult, constLabels, ch) } return nil } @@ -162,7 +167,7 @@ func collectLoadingExperience(prefix string, lexp *pagespeedonline.PagespeedApiL } -func collectLighthouseResults(prefix string, lhr *pagespeedonline.LighthouseResultV5, constLabels prometheus.Labels, ch chan<- prometheus.Metric) { +func collectLighthouseResults(prefix string, cats []string, lhr *pagespeedonline.LighthouseResultV5, constLabels prometheus.Labels, ch chan<- prometheus.Metric) { ch <- prometheus.MustNewConstMetric( prometheus.NewDesc(fqname(prefix, "total_duration_seconds"), "The total time spent in seconds loading the page and evaluating audits.", nil, constLabels), @@ -170,16 +175,17 @@ func collectLighthouseResults(prefix string, lhr *pagespeedonline.LighthouseResu lhr.Timing.Total/1000) //ms -> seconds categories := map[string]*pagespeedonline.LighthouseCategoryV5{ - "performance": lhr.Categories.Performance, - "accessibility": lhr.Categories.Accessibility, - "pwa": lhr.Categories.Pwa, - "best-practices": lhr.Categories.BestPractices, - "seo": lhr.Categories.Seo, + CategoryPerformance: lhr.Categories.Performance, + CategoryAccessibility: lhr.Categories.Accessibility, + CategoryPWA: lhr.Categories.Pwa, + CategoryBestPractices: lhr.Categories.BestPractices, + CategorySEO: lhr.Categories.Seo, } - for k, v := range categories { - score, err := strconv.ParseFloat(fmt.Sprint(v.Score), 64) + for _, c := range cats { + score, err := strconv.ParseFloat(fmt.Sprint(categories[c].Score), 64) if err != nil { + logrus.WithError(err).Warn("could not parse category score") continue } @@ -187,21 +193,18 @@ func collectLighthouseResults(prefix string, lhr *pagespeedonline.LighthouseResu prometheus.NewDesc(fqname(prefix, "category_score"), "Lighthouse score for the specified category", []string{"category"}, constLabels), prometheus.GaugeValue, score, - k) + c) } for k, v := range lhr.Audits { - re := regexp.MustCompile(`(\d*[.]?\d+(ms|s))|0`) - timeRe := regexp.MustCompile(`(ms|s)`) - if timeAuditMetrics[k] { displayValue := strings.Replace(v.DisplayValue, "\u00a0", "", -1) displayValue = strings.Replace(displayValue, ",", "", -1) - if !timeRe.MatchString(displayValue) { + if !timeUnitRe.MatchString(displayValue) { displayValue = displayValue + "s" } - if duration, errDuration := time.ParseDuration(re.FindString(displayValue)); errDuration == nil { + if duration, errDuration := time.ParseDuration(timeValueRe.FindString(displayValue)); errDuration == nil { ch <- prometheus.MustNewConstMetric( prometheus.NewDesc(fqname(prefix, k, "duration_seconds"), v.Description, nil, constLabels), prometheus.GaugeValue, diff --git a/collector/model.go b/collector/model.go index 6c6806e..bf09e53 100644 --- a/collector/model.go +++ b/collector/model.go @@ -12,6 +12,12 @@ const ( StrategyMobile = Strategy("mobile") StrategyDesktop = Strategy("desktop") + CategoryAccessibility = "accessibility" + CategoryBestPractices = "best-practices" + CategorySEO = "seo" + CategoryPWA = "pwa" + CategoryPerformance = "performance" + Namespace = "pagespeed" ) @@ -22,17 +28,26 @@ var availableStrategies = map[Strategy]bool{ StrategyDesktop: true, } +var availableCategories = map[string]bool{ + CategoryAccessibility: true, + CategoryBestPractices: true, + CategorySEO: true, + CategoryPWA: true, + CategoryPerformance: true, +} + type ScrapeResult struct { Request ScrapeRequest Result *pagespeedonline.PagespeedApiPagespeedResponseV5 } type ScrapeRequest struct { - Url string `json:"url"` - Strategy Strategy `json:"strategy"` - Campaign string `json:"campaign"` - Source string `json:"source"` - Locale string `json:"locale"` + Url string `json:"url"` + Strategy Strategy `json:"strategy"` + Campaign string `json:"campaign"` + Source string `json:"source"` + Locale string `json:"locale"` + Categories []string `json:"categories"` } func (sr ScrapeRequest) IsValid() bool { @@ -43,6 +58,13 @@ func (sr ScrapeRequest) IsValid() bool { if !availableStrategies[sr.Strategy] { return false } + + for _, c := range sr.Categories { + if !availableCategories[c] { + return false + } + } + if _, err := url.ParseRequestURI(sr.Url); err != nil { return false } @@ -58,7 +80,7 @@ type Config struct { ScrapeTimeout time.Duration } -func CalculateScrapeRequests(targets ...string) []ScrapeRequest { +func CalculateScrapeRequests(targets, categories []string) []ScrapeRequest { if len(targets) == 0 { return nil } @@ -67,6 +89,7 @@ func CalculateScrapeRequests(targets ...string) []ScrapeRequest { for _, t := range targets { var request ScrapeRequest if err := json.Unmarshal([]byte(t), &request); err == nil { + populateCategories(&request, categories) if request.Strategy != "" { requests = append(requests, request) } else { @@ -77,10 +100,11 @@ func CalculateScrapeRequests(targets ...string) []ScrapeRequest { requests = append(requests, desktop, mobile) } } else { - requests = append(requests, - ScrapeRequest{Url: t, Strategy: StrategyDesktop}, - ScrapeRequest{Url: t, Strategy: StrategyMobile}, - ) + desktop := ScrapeRequest{Url: t, Strategy: StrategyDesktop} + mobile := ScrapeRequest{Url: t, Strategy: StrategyMobile} + populateCategories(&desktop, categories) + populateCategories(&mobile, categories) + requests = append(requests, desktop, mobile) } } @@ -94,3 +118,22 @@ func CalculateScrapeRequests(targets ...string) []ScrapeRequest { return filtered } + +// populateCategories sets categories in the scrape request if not already set +func populateCategories(r *ScrapeRequest, cats []string) { + if r.Categories != nil && len(r.Categories) != 0 { + return + } + + if cats == nil { + cats = make([]string, 0, len(availableCategories)) + } + + if len(cats) == 0 { + for c := range availableCategories { + cats = append(cats, c) + } + } + + r.Categories = cats +} diff --git a/collector/model_test.go b/collector/model_test.go index d6a1f36..83aea76 100644 --- a/collector/model_test.go +++ b/collector/model_test.go @@ -2,55 +2,141 @@ package collector import ( "reflect" + "sort" "testing" ) func TestCalculateScrapeRequests(t *testing.T) { + allCategories := []string{"accessibility", "best-practices", "performance", "pwa", "seo"} + tests := []struct { - name string - targets []string - want []ScrapeRequest + name string + targets []string + categories []string + want []ScrapeRequest }{ - {"empty", []string{}, nil}, - {"invalid URL", []string{"url"}, []ScrapeRequest{}}, - {"single basic", []string{"http://test.com"}, []ScrapeRequest{ - {Url: "http://test.com", Strategy: StrategyDesktop}, - {Url: "http://test.com", Strategy: StrategyMobile}, + {"empty", []string{}, nil, nil}, + {"invalid URL", []string{"url"}, nil, []ScrapeRequest{}}, + {"single basic", []string{"http://test.com"}, nil, []ScrapeRequest{ + {Url: "http://test.com", Strategy: StrategyDesktop, Categories: allCategories}, + {Url: "http://test.com", Strategy: StrategyMobile, Categories: allCategories}, + }}, + {"multiple basic", []string{"http://test.com", "http://test2.com"}, nil, []ScrapeRequest{ + {Url: "http://test.com", Strategy: StrategyDesktop, Categories: allCategories}, + {Url: "http://test.com", Strategy: StrategyMobile, Categories: allCategories}, + {Url: "http://test2.com", Strategy: StrategyDesktop, Categories: allCategories}, + {Url: "http://test2.com", Strategy: StrategyMobile, Categories: allCategories}, + }}, + {"single with categories", []string{"http://test.com"}, []string{"accessibility", "pwa"}, []ScrapeRequest{ + {Url: "http://test.com", Strategy: StrategyDesktop, Categories: []string{"accessibility", "pwa"}}, + {Url: "http://test.com", Strategy: StrategyMobile, Categories: []string{"accessibility", "pwa"}}, }}, - {"multiple basic", []string{"http://test.com", "http://test2.com"}, []ScrapeRequest{ - {Url: "http://test.com", Strategy: StrategyDesktop}, - {Url: "http://test.com", Strategy: StrategyMobile}, - {Url: "http://test2.com", Strategy: StrategyDesktop}, - {Url: "http://test2.com", Strategy: StrategyMobile}, + {"multiple with categories", []string{"http://test.com", "http://test2.com"}, []string{"best-practices"}, []ScrapeRequest{ + {Url: "http://test.com", Strategy: StrategyDesktop, Categories: []string{"best-practices"}}, + {Url: "http://test.com", Strategy: StrategyMobile, Categories: []string{"best-practices"}}, + {Url: "http://test2.com", Strategy: StrategyDesktop, Categories: []string{"best-practices"}}, + {Url: "http://test2.com", Strategy: StrategyMobile, Categories: []string{"best-practices"}}, }}, + {"single with wrong categories", []string{"http://test.com"}, []string{"accessibility", "pancake"}, []ScrapeRequest{}}, + {"multiple with wrong categories", []string{"http://test.com", "http://test2.com"}, []string{"accessibility", "pancake"}, []ScrapeRequest{}}, {"json", - []string{`{"url":"http://test.com","strategy":"desktop","campaign":"campaign","source":"source","locale":"locale"}`}, []ScrapeRequest{ + []string{`{"url":"http://test.com","strategy":"desktop","campaign":"campaign","source":"source","locale":"locale"}`}, nil, []ScrapeRequest{ + { + Url: "http://test.com", + Strategy: StrategyDesktop, + Campaign: "campaign", + Source: "source", + Locale: "locale", + Categories: allCategories, + }, + }}, + {"json with category", + []string{`{"url":"http://test.com","strategy":"desktop","campaign":"campaign","source":"source","locale":"locale", "categories":["pwa"]}`}, nil, []ScrapeRequest{ { - Url: "http://test.com", - Strategy: StrategyDesktop, - Campaign: "campaign", - Source: "source", - Locale: "locale", + Url: "http://test.com", + Strategy: StrategyDesktop, + Campaign: "campaign", + Source: "source", + Locale: "locale", + Categories: []string{"pwa"}, }, }}, + {"json with wrong category", + []string{`{"url":"http://test.com","strategy":"desktop","campaign":"campaign","source":"source","locale":"locale", "categories":["waffle"]}`}, nil, []ScrapeRequest{}}, {"json simple", - []string{`{"url":"http://test.com"}`}, []ScrapeRequest{ - {Url: "http://test.com", Strategy: StrategyDesktop}, - {Url: "http://test.com", Strategy: StrategyMobile}, + []string{`{"url":"http://test.com"}`}, nil, []ScrapeRequest{ + {Url: "http://test.com", Strategy: StrategyDesktop, Categories: allCategories}, + {Url: "http://test.com", Strategy: StrategyMobile, Categories: allCategories}, }}, {"json missing URL", - []string{`{"strategy":"desktop"}`}, + []string{`{"strategy":"desktop"}`}, nil, []ScrapeRequest{}}, {"json bad strategy", - []string{`{"url":"http://test.com","strategy":"microwave"}`}, + []string{`{"url":"http://test.com","strategy":"microwave"}`}, nil, []ScrapeRequest{}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := CalculateScrapeRequests(tt.targets...) + got := CalculateScrapeRequests(tt.targets, tt.categories) + + // To be able to reflect.DeepEqual on the categories slice + for _, r := range got { + sort.Strings(r.Categories) + } if !reflect.DeepEqual(got, tt.want) { - t.Errorf("CalculateScrapeRequests() = %v, want %v", got, tt.want) + t.Errorf("CalculateScrapeRequests() = %+v, want %+v", got, tt.want) + } + }) + } +} + +func TestPopulateCategories(t *testing.T) { + allCategories := []string{"accessibility", "best-practices", "performance", "pwa", "seo"} + + tests := []struct { + msg string + req *ScrapeRequest + cats []string + want *ScrapeRequest + }{ + { + "request is not changed if categories exist", + &ScrapeRequest{ + Categories: []string{"performance"}, + }, + []string{"best-practices"}, + &ScrapeRequest{ + Categories: []string{"performance"}, + }, + }, + { + "available categories set if request has no categories", + &ScrapeRequest{}, + nil, + &ScrapeRequest{ + Categories: allCategories, + }, + }, + { + "input categories are set in request", + &ScrapeRequest{}, + []string{"best-practices", "pancake", "pwa"}, + &ScrapeRequest{ + Categories: []string{"best-practices", "pancake", "pwa"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.msg, func(t *testing.T) { + populateCategories(tt.req, tt.cats) + + // To be able to reflect.DeepEqual on the categories slice + sort.Strings(tt.req.Categories) + + if !reflect.DeepEqual(tt.req, tt.want) { + t.Errorf("populateCategories() = %+v, want %+v", tt.req, tt.want) } }) } diff --git a/collector/scrape.go b/collector/scrape.go index d7523db..d1f320b 100644 --- a/collector/scrape.go +++ b/collector/scrape.go @@ -112,7 +112,7 @@ func (pss pagespeedScrapeService) scrape(request ScrapeRequest) (scrape *ScrapeR } call := service.Pagespeedapi.Runpagespeed(request.Url) - call.Category("performance", "seo", "pwa", "best-practices", "accessibility") + call.Category(request.Categories...) call.Strategy(string(request.Strategy)) if request.Campaign != "" { diff --git a/collector/scrape_test.go b/collector/scrape_test.go index e79e836..21d3eb2 100644 --- a/collector/scrape_test.go +++ b/collector/scrape_test.go @@ -37,7 +37,7 @@ func Test_PagespeedScrapeService(t *testing.T) { t.Fatalf("newPagespeedScrapeService should not throw an error: %v", err) } - scrapes, err := service.Scrape(true, CalculateScrapeRequests("http://example.com/")) + scrapes, err := service.Scrape(true, CalculateScrapeRequests([]string{"http://example.com/"}, nil)) if err != nil { t.Fatal("scrape should not throw an error") } diff --git a/handler/probe.go b/handler/probe.go index bdb39fb..fd56604 100644 --- a/handler/probe.go +++ b/handler/probe.go @@ -28,9 +28,10 @@ type httpProbeHandler struct { collectorFactory collector.Factory pushGatewayUrl string pushGatewayJob string + categories []string } -func NewProbeHandler(credentialsFile string, apiKey string, parallel bool, factory collector.Factory, pushGatewayUrl string, pushGatewayJob string) http.Handler { +func NewProbeHandler(credentialsFile string, apiKey string, parallel bool, factory collector.Factory, pushGatewayUrl string, pushGatewayJob string, categories []string) http.Handler { return httpProbeHandler{ credentialsFile: credentialsFile, googleAPIKey: apiKey, @@ -38,6 +39,7 @@ func NewProbeHandler(credentialsFile string, apiKey string, parallel bool, facto collectorFactory: factory, pushGatewayUrl: pushGatewayUrl, pushGatewayJob: pushGatewayJob, + categories: categories, } } @@ -50,7 +52,7 @@ func (ph httpProbeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { log.WithField("target", target).Info("probe requested for target") } - requests := collector.CalculateScrapeRequests(targets...) + requests := collector.CalculateScrapeRequests(targets, ph.categories) if len(requests) == 0 { http.Error(w, "Probe requires at least one target", http.StatusBadRequest) return diff --git a/handler/probe_test.go b/handler/probe_test.go index 99a5b63..eadf1da 100644 --- a/handler/probe_test.go +++ b/handler/probe_test.go @@ -33,7 +33,7 @@ func (mockCollector) Create(config collector.Config) (prometheus.Collector, erro } func TestProbeHandler(t *testing.T) { - handler := NewProbeHandler("", "KEY", false, mockCollector{}, "", "") + handler := NewProbeHandler("", "KEY", false, mockCollector{}, "", "", []string{"performance"}) require.NotNil(t, handler) require.HTTPSuccess(t, handler.ServeHTTP, "GET", "/probe", map[string][]string{"target": {"http://test.com"}}) diff --git a/pagespeed_exporter.go b/pagespeed_exporter.go index 5427f86..d46e15b 100755 --- a/pagespeed_exporter.go +++ b/pagespeed_exporter.go @@ -3,6 +3,7 @@ package main import ( "flag" "net/http" + "os" "strings" "github.com/foomo/pagespeed_exporter/collector" @@ -10,8 +11,6 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" log "github.com/sirupsen/logrus" - - "os" ) var ( @@ -21,6 +20,7 @@ var ( googleApiKey string listenerAddress string targets arrayFlags + categories arrayFlags parallel bool pushGatewayUrl string pushGatewayJob string @@ -40,12 +40,12 @@ func (i *arrayFlags) Set(value string) error { func main() { parseFlags() - log.Infof("starting pagespeed exporter version %s on address %s for %d targets", Version, listenerAddress, len(targets)) + log.Infof("starting pagespeed exporter version %s on address %s for %d targets and %d categories", Version, listenerAddress, len(targets), len(categories)) collectorFactory := collector.NewFactory() // Register prometheus target collectors only if there is more than one target if len(targets) > 0 { - requests := collector.CalculateScrapeRequests(targets...) + requests := collector.CalculateScrapeRequests(targets, categories) psc, errCollector := collectorFactory.Create(collector.Config{ ScrapeRequests: requests, @@ -62,7 +62,7 @@ func main() { mux := http.NewServeMux() mux.Handle("/", handler.NewIndexHandler()) mux.Handle("/metrics", promhttp.Handler()) - mux.Handle("/probe", handler.NewProbeHandler(credentialsFile, googleApiKey, parallel, collectorFactory, pushGatewayUrl, pushGatewayJob)) + mux.Handle("/probe", handler.NewProbeHandler(credentialsFile, googleApiKey, parallel, collectorFactory, pushGatewayUrl, pushGatewayJob, categories)) server := http.Server{ Addr: listenerAddress, @@ -80,6 +80,7 @@ func parseFlags() { flag.StringVar(&pushGatewayUrl, "pushGatewayUrl", getenv("PUSHGATEWAY_URL", ""), "sets the push gateway to send the metrics. leave empty to ignore it") flag.StringVar(&pushGatewayJob, "pushGatewayJob", getenv("PUSHGATEWAY_JOB", "pagespeed_exporter"), "sets push gateway job name") targetsFlag := flag.String("targets", getenv("PAGESPEED_TARGETS", ""), "comma separated list of targets to measure") + categoriesFlag := flag.String("categories", getenv("PAGESPEED_CATEGORIES", "accessibility,best-practices,performance,pwa,seo"), "comma separated list of categories. overridden by categories in JSON targets") flag.Var(&targets, "t", "multiple argument parameters") flag.Parse() @@ -88,6 +89,11 @@ func parseFlags() { targets = append(targets, additionalTargets...) } + if *categoriesFlag != "" { + additionalCategories := strings.Split(*categoriesFlag, ",") + categories = append(categories, additionalCategories...) + } + if len(targets) == 0 || targets[0] == "" { log.Info("no targets specified, listening from collector") }