From f211e0fe31716f7a88570171e4cc939f90bf6762 Mon Sep 17 00:00:00 2001 From: stefanprodan Date: Sun, 31 Mar 2019 13:55:14 +0300 Subject: [PATCH] Use go templates to render the builtin promql queries --- pkg/controller/scheduler.go | 4 +- pkg/metrics/envoy.go | 65 +++++++++++++++++ pkg/metrics/envoy_test.go | 28 ++++++++ pkg/metrics/istio.go | 123 +++++++++++++++++++++++++++++++ pkg/metrics/istio_test.go | 51 +++++++++++++ pkg/metrics/observer.go | 135 +++++++---------------------------- pkg/metrics/observer_test.go | 4 +- 7 files changed, 297 insertions(+), 113 deletions(-) create mode 100644 pkg/metrics/envoy.go create mode 100644 pkg/metrics/envoy_test.go create mode 100644 pkg/metrics/istio.go create mode 100644 pkg/metrics/istio_test.go diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go index 892fc41f7..4c699f737 100644 --- a/pkg/controller/scheduler.go +++ b/pkg/controller/scheduler.go @@ -513,7 +513,7 @@ func (c *Controller) analyseCanary(r *flaggerv1.Canary) bool { } if metric.Name == "istio_requests_total" { - val, err := c.observer.GetDeploymentCounter(r.Spec.TargetRef.Name, r.Namespace, metric.Name, metric.Interval) + val, err := c.observer.GetIstioSuccessRate(r.Spec.TargetRef.Name, r.Namespace, metric.Name, metric.Interval) if err != nil { if strings.Contains(err.Error(), "no values found") { c.recordEventWarningf(r, "Halt advancement no values found for metric %s probably %s.%s is not receiving traffic", @@ -531,7 +531,7 @@ func (c *Controller) analyseCanary(r *flaggerv1.Canary) bool { } if metric.Name == "istio_request_duration_seconds_bucket" { - val, err := c.observer.GetDeploymentHistogram(r.Spec.TargetRef.Name, r.Namespace, metric.Name, metric.Interval) + val, err := c.observer.GetIstioRequestDuration(r.Spec.TargetRef.Name, r.Namespace, metric.Name, metric.Interval) if err != nil { c.recordEventErrorf(r, "Metrics server %s query failed: %v", c.observer.GetMetricsServer(), err) return false diff --git a/pkg/metrics/envoy.go b/pkg/metrics/envoy.go new file mode 100644 index 000000000..04002bb71 --- /dev/null +++ b/pkg/metrics/envoy.go @@ -0,0 +1,65 @@ +package metrics + +import ( + "fmt" + "net/url" + "strconv" +) + +const envoySuccessRateQuery = ` +sum(rate( +envoy_cluster_upstream_rq{kubernetes_namespace="{{ .Namespace }}", +kubernetes_pod_name=~"{{ .Name }}-[0-9a-zA-Z]+(-[0-9a-zA-Z]+)", +envoy_response_code!~"5.*"} +[{{ .Interval }}])) +/ +sum(rate( +envoy_cluster_upstream_rq{kubernetes_namespace="{{ .Namespace }}", +kubernetes_pod_name=~"{{ .Name }}-[0-9a-zA-Z]+(-[0-9a-zA-Z]+)"} +[{{ .Interval }}])) +* 100 +` + +func (c *Observer) GetEnvoySuccessRate(name string, namespace string, metric string, interval string) (float64, error) { + if c.metricsServer == "fake" { + return 100, nil + } + + meta := struct { + Name string + Namespace string + Interval string + }{ + name, + namespace, + interval, + } + + query, err := render(meta, envoySuccessRateQuery) + if err != nil { + return 0, err + } + + var rate *float64 + querySt := url.QueryEscape(query) + result, err := c.queryMetric(querySt) + if err != nil { + return 0, err + } + + for _, v := range result.Data.Result { + metricValue := v.Value[1] + switch metricValue.(type) { + case string: + f, err := strconv.ParseFloat(metricValue.(string), 64) + if err != nil { + return 0, err + } + rate = &f + } + } + if rate == nil { + return 0, fmt.Errorf("no values found for metric %s", metric) + } + return *rate, nil +} diff --git a/pkg/metrics/envoy_test.go b/pkg/metrics/envoy_test.go new file mode 100644 index 000000000..12d935708 --- /dev/null +++ b/pkg/metrics/envoy_test.go @@ -0,0 +1,28 @@ +package metrics + +import ( + "testing" +) + +func Test_EnvoySuccessRateQueryRender(t *testing.T) { + meta := struct { + Name string + Namespace string + Interval string + }{ + "podinfo", + "default", + "1m", + } + + query, err := render(meta, envoySuccessRateQuery) + if err != nil { + t.Fatal(err) + } + + expected := `sum(rate(envoy_cluster_upstream_rq{kubernetes_namespace="default",kubernetes_pod_name=~"podinfo-[0-9a-zA-Z]+(-[0-9a-zA-Z]+)",envoy_response_code!~"5.*"}[1m])) / sum(rate(envoy_cluster_upstream_rq{kubernetes_namespace="default",kubernetes_pod_name=~"podinfo-[0-9a-zA-Z]+(-[0-9a-zA-Z]+)"}[1m])) * 100` + + if query != expected { + t.Errorf("\nGot %s \nWanted %s", query, expected) + } +} diff --git a/pkg/metrics/istio.go b/pkg/metrics/istio.go new file mode 100644 index 000000000..5c8e9855d --- /dev/null +++ b/pkg/metrics/istio.go @@ -0,0 +1,123 @@ +package metrics + +import ( + "fmt" + "net/url" + "strconv" + "time" +) + +const istioSuccessRateQuery = ` +sum(rate( +istio_requests_total{reporter="destination", +destination_workload_namespace="{{ .Namespace }}", +destination_workload=~"{{ .Name }}", +response_code!~"5.*"} +[{{ .Interval }}])) +/ +sum(rate( +istio_requests_total{reporter="destination", +destination_workload_namespace="{{ .Namespace }}", +destination_workload=~"{{ .Name }}"} +[{{ .Interval }}])) +* 100 +` + +// GetIstioSuccessRate returns the requests success rate (non 5xx) using istio_requests_total metric +func (c *Observer) GetIstioSuccessRate(name string, namespace string, metric string, interval string) (float64, error) { + if c.metricsServer == "fake" { + return 100, nil + } + + meta := struct { + Name string + Namespace string + Interval string + }{ + name, + namespace, + interval, + } + + query, err := render(meta, istioSuccessRateQuery) + if err != nil { + return 0, err + } + + var rate *float64 + querySt := url.QueryEscape(query) + result, err := c.queryMetric(querySt) + if err != nil { + return 0, err + } + + for _, v := range result.Data.Result { + metricValue := v.Value[1] + switch metricValue.(type) { + case string: + f, err := strconv.ParseFloat(metricValue.(string), 64) + if err != nil { + return 0, err + } + rate = &f + } + } + if rate == nil { + return 0, fmt.Errorf("no values found for metric %s", metric) + } + return *rate, nil +} + +const istioRequestDurationQuery = ` +histogram_quantile(0.99, sum(rate( +istio_request_duration_seconds_bucket{reporter="destination", +destination_workload_namespace="{{ .Namespace }}", +destination_workload=~"{{ .Name }}"} +[{{ .Interval }}])) by (le)) +` + +// GetIstioRequestDuration returns the 99P requests delay using istio_request_duration_seconds_bucket metrics +func (c *Observer) GetIstioRequestDuration(name string, namespace string, metric string, interval string) (time.Duration, error) { + if c.metricsServer == "fake" { + return 1, nil + } + + meta := struct { + Name string + Namespace string + Interval string + }{ + name, + namespace, + interval, + } + + query, err := render(meta, istioRequestDurationQuery) + if err != nil { + return 0, err + } + + var rate *float64 + querySt := url.QueryEscape(query) + result, err := c.queryMetric(querySt) + if err != nil { + return 0, err + } + + for _, v := range result.Data.Result { + metricValue := v.Value[1] + switch metricValue.(type) { + case string: + f, err := strconv.ParseFloat(metricValue.(string), 64) + if err != nil { + return 0, err + } + rate = &f + } + } + if rate == nil { + return 0, fmt.Errorf("no values found for metric %s", metric) + } + ms := time.Duration(int64(*rate*1000)) * time.Millisecond + return ms, nil +} diff --git a/pkg/metrics/istio_test.go b/pkg/metrics/istio_test.go new file mode 100644 index 000000000..28826a776 --- /dev/null +++ b/pkg/metrics/istio_test.go @@ -0,0 +1,51 @@ +package metrics + +import ( + "testing" +) + +func Test_IstioSuccessRateQueryRender(t *testing.T) { + meta := struct { + Name string + Namespace string + Interval string + }{ + "podinfo", + "default", + "1m", + } + + query, err := render(meta, istioSuccessRateQuery) + if err != nil { + t.Fatal(err) + } + + expected := `sum(rate(istio_requests_total{reporter="destination",destination_workload_namespace="default",destination_workload=~"podinfo",response_code!~"5.*"}[1m])) / sum(rate(istio_requests_total{reporter="destination",destination_workload_namespace="default",destination_workload=~"podinfo"}[1m])) * 100` + + if query != expected { + t.Errorf("\nGot %s \nWanted %s", query, expected) + } +} + +func Test_IstioRequestDurationQueryRender(t *testing.T) { + meta := struct { + Name string + Namespace string + Interval string + }{ + "podinfo", + "default", + "1m", + } + + query, err := render(meta, istioRequestDurationQuery) + if err != nil { + t.Fatal(err) + } + + expected := `histogram_quantile(0.99, sum(rate(istio_request_duration_seconds_bucket{reporter="destination",destination_workload_namespace="default",destination_workload=~"podinfo"}[1m])) by (le))` + + if query != expected { + t.Errorf("\nGot %s \nWanted %s", query, expected) + } +} diff --git a/pkg/metrics/observer.go b/pkg/metrics/observer.go index 7eaacd4d2..ddb871199 100644 --- a/pkg/metrics/observer.go +++ b/pkg/metrics/observer.go @@ -1,6 +1,8 @@ package metrics import ( + "bufio" + "bytes" "context" "encoding/json" "fmt" @@ -9,6 +11,7 @@ import ( "net/url" "strconv" "strings" + "text/template" "time" ) @@ -29,12 +32,14 @@ type vectorQueryResponse struct { } } +// NewObserver creates a new observer func NewObserver(metricsServer string) Observer { return Observer{ metricsServer: metricsServer, } } +// GetMetricsServer returns the Prometheus URL func (c *Observer) GetMetricsServer() string { return c.metricsServer } @@ -116,115 +121,6 @@ func (c *Observer) GetScalar(query string) (float64, error) { return *value, nil } -func (c *Observer) GetEnvoySuccessRate(name string, namespace string, metric string, interval string) (float64, error) { - if c.metricsServer == "fake" { - return 100, nil - } - - var rate *float64 - querySt := url.QueryEscape(`sum(rate(` + - metric + `{kubernetes_namespace="` + - namespace + `",kubernetes_pod_name=~"` + - name + `-[0-9a-zA-Z]+(-[0-9a-zA-Z]+)",envoy_response_code!~"5.*"}[` + - interval + `])) / sum(rate(` + - metric + `{kubernetes_namespace="` + - namespace + `",kubernetes_pod_name=~"` + - name + `-[0-9a-zA-Z]+(-[0-9a-zA-Z]+)"}[` + - interval + `])) * 100 `) - result, err := c.queryMetric(querySt) - if err != nil { - return 0, err - } - - for _, v := range result.Data.Result { - metricValue := v.Value[1] - switch metricValue.(type) { - case string: - f, err := strconv.ParseFloat(metricValue.(string), 64) - if err != nil { - return 0, err - } - rate = &f - } - } - if rate == nil { - return 0, fmt.Errorf("no values found for metric %s", metric) - } - return *rate, nil -} - -// GetDeploymentCounter returns the requests success rate using istio_requests_total metric -func (c *Observer) GetDeploymentCounter(name string, namespace string, metric string, interval string) (float64, error) { - if c.metricsServer == "fake" { - return 100, nil - } - - var rate *float64 - querySt := url.QueryEscape(`sum(rate(` + - metric + `{reporter="destination",destination_workload_namespace=~"` + - namespace + `",destination_workload=~"` + - name + `",response_code!~"5.*"}[` + - interval + `])) / sum(rate(` + - metric + `{reporter="destination",destination_workload_namespace=~"` + - namespace + `",destination_workload=~"` + - name + `"}[` + - interval + `])) * 100 `) - result, err := c.queryMetric(querySt) - if err != nil { - return 0, err - } - - for _, v := range result.Data.Result { - metricValue := v.Value[1] - switch metricValue.(type) { - case string: - f, err := strconv.ParseFloat(metricValue.(string), 64) - if err != nil { - return 0, err - } - rate = &f - } - } - if rate == nil { - return 0, fmt.Errorf("no values found for metric %s", metric) - } - return *rate, nil -} - -// GetDeploymentHistogram returns the 99P requests delay using istio_request_duration_seconds_bucket metrics -func (c *Observer) GetDeploymentHistogram(name string, namespace string, metric string, interval string) (time.Duration, error) { - if c.metricsServer == "fake" { - return 1, nil - } - var rate *float64 - querySt := url.QueryEscape(`histogram_quantile(0.99, sum(rate(` + - metric + `{reporter="destination",destination_workload=~"` + - name + `", destination_workload_namespace=~"` + - namespace + `"}[` + - interval + `])) by (le))`) - result, err := c.queryMetric(querySt) - if err != nil { - return 0, err - } - - for _, v := range result.Data.Result { - metricValue := v.Value[1] - switch metricValue.(type) { - case string: - f, err := strconv.ParseFloat(metricValue.(string), 64) - if err != nil { - return 0, err - } - rate = &f - } - } - if rate == nil { - return 0, fmt.Errorf("no values found for metric %s", metric) - } - ms := time.Duration(int64(*rate*1000)) * time.Millisecond - return ms, nil -} - // CheckMetricsServer call Prometheus status endpoint and returns an error if // the API is unreachable func CheckMetricsServer(address string) (bool, error) { @@ -265,3 +161,24 @@ func CheckMetricsServer(address string) (bool, error) { return true, nil } + +func render(meta interface{}, tmpl string) (string, error) { + t, err := template.New("tmpl").Parse(tmpl) + if err != nil { + return "", err + } + var data bytes.Buffer + b := bufio.NewWriter(&data) + + if err := t.Execute(b, meta); err != nil { + return "", err + } + err = b.Flush() + if err != nil { + return "", err + } + + res := strings.ReplaceAll(data.String(), "\n", "") + + return res, nil +} diff --git a/pkg/metrics/observer_test.go b/pkg/metrics/observer_test.go index ac961a318..1cd4cb876 100644 --- a/pkg/metrics/observer_test.go +++ b/pkg/metrics/observer_test.go @@ -18,7 +18,7 @@ func TestCanaryObserver_GetDeploymentCounter(t *testing.T) { metricsServer: ts.URL, } - val, err := observer.GetDeploymentCounter("podinfo", "default", "istio_requests_total", "1m") + val, err := observer.GetIstioSuccessRate("podinfo", "default", "istio_requests_total", "1m") if err != nil { t.Fatal(err.Error()) } @@ -40,7 +40,7 @@ func TestCanaryObserver_GetDeploymentHistogram(t *testing.T) { metricsServer: ts.URL, } - val, err := observer.GetDeploymentHistogram("podinfo", "default", "istio_request_duration_seconds_bucket", "1m") + val, err := observer.GetIstioRequestDuration("podinfo", "default", "istio_request_duration_seconds_bucket", "1m") if err != nil { t.Fatal(err.Error()) }