diff --git a/manifests/charts/istioconfig/values.yaml b/manifests/charts/istioconfig/values.yaml index 97f137922f..dfe3901404 100644 --- a/manifests/charts/istioconfig/values.yaml +++ b/manifests/charts/istioconfig/values.yaml @@ -6,5 +6,5 @@ agentservice: port: 80 otlpPort: 4317 -authzGrpcTimeout: 0.01s +authzGrpcTimeout: 0.5s maxRequestBytes: 8192 diff --git a/manifests/k8s/tanka/environments/tilt/apps/demoapp/mixins.libsonnet b/manifests/k8s/tanka/environments/tilt/apps/demoapp/mixins.libsonnet index ae1109bba3..1bf3743c58 100644 --- a/manifests/k8s/tanka/environments/tilt/apps/demoapp/mixins.libsonnet +++ b/manifests/k8s/tanka/environments/tilt/apps/demoapp/mixins.libsonnet @@ -3,6 +3,11 @@ local k = import 'github.com/jsonnet-libs/k8s-libsonnet/1.22/main.libsonnet'; local demoApp = import 'apps/demoapp/main.libsonnet'; local latencyGradientPolicy = import 'github.com/fluxninja/aperture-blueprints/lib/1.0/policies/latency-gradient.libsonnet'; +local aperture = import 'github.com/fluxninja/aperture/libsonnet/1.0/main.libsonnet'; + +local Workload = aperture.v1.SchedulerWorkload; +local LabelMatcher = aperture.v1.LabelMatcher; +local WorkloadWithLabelMatcher = aperture.v1.SchedulerWorkloadAndLabelMatcher; local demoappMixin = demoApp { @@ -32,6 +37,22 @@ local policy = latencyGradientPolicy({ serviceSelector+: { service: 'service1-demo-app.demoapp.svc.cluster.local', }, + concurrencyLimiter+: { + defaultWorkload: { + priority: 20, + timeout: '0.025s', + }, + workloads: [ + WorkloadWithLabelMatcher.new( + workload=Workload.withPriority(50) + Workload.withTimeout('0.025s'), + label_matcher=LabelMatcher.withMatchLabels({ 'request_header_user-type': 'guest' }) + ), + WorkloadWithLabelMatcher.new( + workload=Workload.withPriority(200) + Workload.withTimeout('0.025s'), + label_matcher=LabelMatcher.withMatchLabels({ 'request_header_user-type': 'subscriber' }) + ), + ], + }, }).policy; { diff --git a/manifests/k8s/tanka/jsonnetfile.lock.json b/manifests/k8s/tanka/jsonnetfile.lock.json index 6970790c83..09bbc717d0 100644 --- a/manifests/k8s/tanka/jsonnetfile.lock.json +++ b/manifests/k8s/tanka/jsonnetfile.lock.json @@ -8,8 +8,8 @@ "subdir": "" } }, - "version": "f41e919200a8283c21d8d3669caab6505e72972d", - "sum": "EZe68l9CbQtOwVyJQd7XezZcwE8zXZtjNi+s5MN3WCg=" + "version": "99977877059d086c9a4253b26778fc567f381e52", + "sum": "eF4lEXF1Wq8/jXEhIEPK+zjATFGjcju+guoad5sflrk=" }, { "source": { @@ -18,8 +18,8 @@ "subdir": "lib/1.0" } }, - "version": "f41e919200a8283c21d8d3669caab6505e72972d", - "sum": "VZw1LdVqxkqL6mbWTHVPWfzcjrkkM8FyDkmv2HoJddc=" + "version": "99977877059d086c9a4253b26778fc567f381e52", + "sum": "cQ5J2PMhqaw3NqkMAEml9ocF86HAvwgOS7pxVeBrvJY=" }, { "source": { @@ -28,7 +28,7 @@ "subdir": "libsonnet/1.0" } }, - "version": "7ac610333b11916c02915943c5e9953c5ac111de", + "version": "5408d30c44d75de5fec6a4c7de66023e753eb00c", "sum": "CEvwNmmpYPp6Yp1F0mqO1tTpTcaKUvnWge5Vh1G64k0=" }, { diff --git a/manifests/k8s/tanka/lib/apps/aperture-grafana/main.libsonnet b/manifests/k8s/tanka/lib/apps/aperture-grafana/main.libsonnet index ffa66e1a0e..3a6dd5ddab 100644 --- a/manifests/k8s/tanka/lib/apps/aperture-grafana/main.libsonnet +++ b/manifests/k8s/tanka/lib/apps/aperture-grafana/main.libsonnet @@ -1,7 +1,7 @@ local grafanaOperator = import 'github.com/jsonnet-libs/grafana-operator-libsonnet/4.3/main.libsonnet'; local kubernetesMixin = import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet'; -local decisionDashboard = import 'github.com/fluxninja/aperture-blueprints/lib/1.0/dashboards/decision.libsonnet'; +local policyDashboard = import 'github.com/fluxninja/aperture-blueprints/lib/1.0/dashboards/latency-gradient.libsonnet'; local grafana = grafanaOperator.integreatly.v1alpha1.grafana; local dashboard = grafanaOperator.integreatly.v1alpha1.grafanaDashboard; @@ -40,7 +40,7 @@ local dashboards = [ dashboard.new('example-dashboard') + dashboard.metadata.withLabels({ 'fluxninja.com/grafana-instance': 'aperture-grafana' }) + - dashboard.spec.withJson(std.manifestJsonEx(decisionDashboard({ + dashboard.spec.withJson(std.manifestJsonEx(policyDashboard({ policyName: 'service1-demo-app', }).dashboard, indent=' ')) + dashboard.spec.withDatasources({ diff --git a/pkg/flowcontrol/common/metrics.go b/pkg/flowcontrol/common/metrics.go index 25b21820e0..145fd7b027 100644 --- a/pkg/flowcontrol/common/metrics.go +++ b/pkg/flowcontrol/common/metrics.go @@ -48,25 +48,25 @@ func NewPrometheusMetrics(registry *prometheus.Registry) (*PrometheusMetrics, er registry: registry, checkReceivedTotal: prometheus.NewCounter( prometheus.CounterOpts{ - Name: metrics.FlowControlCheckRequestsMetricName, + Name: metrics.FlowControlRequestsMetricName, Help: "Total number of aperture check requests handled", }, ), checkDecision: *prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: metrics.FlowControlCheckDecisionsMetricName, + Name: metrics.FlowControlDecisionsMetricName, Help: "Number of aperture check decisions", }, []string{metrics.FlowControlCheckDecisionTypeLabel}, ), errorReason: *prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: metrics.FlowControlCheckErrorReasonMetricName, + Name: metrics.FlowControlErrorReasonMetricName, Help: "Number of error reasons other than unspecified", }, []string{metrics.FlowControlCheckErrorReasonLabel}, ), rejectReason: *prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: metrics.FlowControlCheckRejectReasonMetricName, + Name: metrics.FlowControlRejectReasonMetricName, Help: "Number of reject reasons other than unspecified", }, []string{metrics.FlowControlCheckRejectReasonLabel}, ), diff --git a/pkg/metrics/schema.go b/pkg/metrics/schema.go index d4222cf136..bb370f9db9 100644 --- a/pkg/metrics/schema.go +++ b/pkg/metrics/schema.go @@ -27,30 +27,22 @@ const ( WFQFlowsMetricName = "wfq_flows" // WFQRequestsMetricName - weighted fair queuing number of requests gauge. WFQRequestsMetricName = "wfq_requests" - // FlowControlCheckRequestsMetricName - counter for Check requests for flowcontrol. - FlowControlCheckRequestsMetricName = "flowcontrol_check_requests_total" - // FlowControlCheckDecisionsMetricName - counter for Check requests per decision type. - FlowControlCheckDecisionsMetricName = "flowcontrol_check_decisions_total" - // FlowControlCheckErrorReasonMetricName - metric for error reason on FCS Check requests. - FlowControlCheckErrorReasonMetricName = "flowcontrol_check_error_reason_total" - // FlowControlCheckRejectReasonMetricName - metric for reject reason on FCS Check requests. - FlowControlCheckRejectReasonMetricName = "flowcontrol_check_reject_reason_total" + // FlowControlRequestsMetricName - counter for Check requests for flowcontrol. + FlowControlRequestsMetricName = "flowcontrol_requests_count" + // FlowControlDecisionsMetricName - counter for Check requests per decision type. + FlowControlDecisionsMetricName = "flowcontrol_decisions_count" + // FlowControlErrorReasonMetricName - metric for error reason on FCS Check requests. + FlowControlErrorReasonMetricName = "flowcontrol_error_reason_count" + // FlowControlRejectReasonMetricName - metric for reject reason on FCS Check requests. + FlowControlRejectReasonMetricName = "flowcontrol_reject_reason_count" // TokenBucketMetricName - a gauge that tracks the load shed factor. TokenBucketMetricName = "token_bucket_lsf" // TokenBucketFillRateMetricName - a gauge that tracks the fill rate of token bucket. - TokenBucketFillRateMetricName = "token_bucket_bucket_fill_rate" + TokenBucketFillRateMetricName = "token_bucket_fill_rate" // TokenBucketCapacityMetricName - a gauge that tracks the capacity of token bucket. - TokenBucketCapacityMetricName = "token_bucket_bucket_capacity" + TokenBucketCapacityMetricName = "token_bucket_capacity" // TokenBucketAvailableMetricName - a gauge that tracks the number of tokens available in token bucket. TokenBucketAvailableMetricName = "token_bucket_available_tokens" - // GroupJobRegisteredMetricName - current number of group job registered. - GroupJobRegisteredMetricName = "group_job_registered_number" - // GroupJobScheduledMetricName - current number of group job scheduled. - GroupJobScheduledMetricName = "group_job_scheduled_number" - // GroupJobCompletedMetricName - total number of group job completed. - GroupJobCompletedMetricName = "group_job_completed_total" - // GroupJobLatencyMetricName - the latency of the group jobs. - GroupJobLatencyMetricName = "group_job_latency_seconds" // PROMETHEUS LABELS. @@ -75,11 +67,11 @@ const ( // ResponseStatusCodeLabel - label from response status code. ResponseStatusCodeLabel = "response_status_code" // FlowControlCheckDecisionTypeLabel - label for decision type dropped or accepted. - FlowControlCheckDecisionTypeLabel = "flowcontrol_check_decision_type" + FlowControlCheckDecisionTypeLabel = "decision_type" // FlowControlCheckErrorReasonLabel - label for error reason on FCS Check request. - FlowControlCheckErrorReasonLabel = "flowcontrol_check_error_reason" + FlowControlCheckErrorReasonLabel = "error_reason" // FlowControlCheckRejectReasonLabel - label for reject reason on FCS Check request. - FlowControlCheckRejectReasonLabel = "flowcontrol_check_reject_reason" + FlowControlCheckRejectReasonLabel = "reject_reason" // DEFAULTS. diff --git a/pkg/otelcollector/metricsprocessor/processor.go b/pkg/otelcollector/metricsprocessor/processor.go index 4c1bbdf6f6..1146e8e6ce 100644 --- a/pkg/otelcollector/metricsprocessor/processor.go +++ b/pkg/otelcollector/metricsprocessor/processor.go @@ -225,19 +225,19 @@ func (p *metricsProcessor) updateMetrics( statusCodeStr := statusCode.StringVal() for _, decision := range checkResponse.LimiterDecisions { + workload := "" + if cl := decision.GetConcurrencyLimiter(); cl != nil { + workload = cl.GetWorkloadIndex() + } labels := map[string]string{ metrics.PolicyNameLabel: decision.PolicyName, metrics.PolicyHashLabel: decision.PolicyHash, metrics.ComponentIndexLabel: fmt.Sprintf("%d", decision.ComponentIndex), metrics.DecisionTypeLabel: checkResponse.DecisionType.String(), + metrics.WorkloadIndexLabel: workload, } - log.Trace().Msgf("labels: %v", labels) - workload := "" - if cl := decision.GetConcurrencyLimiter(); cl != nil { - workload = cl.GetWorkloadIndex() - } - err = p.updateMetricsForWorkload(labels, latency, workload) + err = p.updateMetricsForWorkload(labels, latency) if err != nil { return err } @@ -250,8 +250,7 @@ func (p *metricsProcessor) updateMetrics( return nil } -func (p *metricsProcessor) updateMetricsForWorkload(labels map[string]string, latency float64, workload string) error { - labels[metrics.WorkloadIndexLabel] = workload +func (p *metricsProcessor) updateMetricsForWorkload(labels map[string]string, latency float64) error { latencyHistogram, err := p.workloadLatencyHistogram.GetMetricWith(labels) if err != nil { log.Warn().Err(err).Msg("Getting latency histogram") diff --git a/pkg/policies/dataplane/actuators/concurrency/load-shed-actuator.go b/pkg/policies/dataplane/actuators/concurrency/load-shed-actuator.go index 502c68c636..f482741cb1 100644 --- a/pkg/policies/dataplane/actuators/concurrency/load-shed-actuator.go +++ b/pkg/policies/dataplane/actuators/concurrency/load-shed-actuator.go @@ -117,11 +117,11 @@ func newLoadShedActuatorFactory( errMulti = multierr.Append(errMulti, err) } if !prometheusRegistry.Unregister(f.tokenBucketFillRateGaugeVec) { - err := fmt.Errorf("failed to unregister token_bucket_bucket_fill_rate metric") + err := fmt.Errorf("failed to unregister token_bucket_fill_rate metric") errMulti = multierr.Append(errMulti, err) } if !prometheusRegistry.Unregister(f.tokenBucketBucketCapacityGaugeVec) { - err := fmt.Errorf("failed to unregister token_bucket_bucket_capacity metric") + err := fmt.Errorf("failed to unregister token_bucket_capacity metric") errMulti = multierr.Append(errMulti, err) } if !prometheusRegistry.Unregister(f.tokenBucketAvailableTokensGaugeVec) { @@ -218,11 +218,11 @@ func (lsaFactory *loadShedActuatorFactory) newLoadShedActuator(conLimiter *concu } deleted = lsaFactory.tokenBucketFillRateGaugeVec.Delete(metricLabels) if !deleted { - errMulti = multierr.Append(errMulti, errors.New("failed to delete token_bucket_bucket_fill_rate gauge from its metric vector")) + errMulti = multierr.Append(errMulti, errors.New("failed to delete token_bucket_fill_rate gauge from its metric vector")) } deleted = lsaFactory.tokenBucketBucketCapacityGaugeVec.Delete(metricLabels) if !deleted { - errMulti = multierr.Append(errMulti, errors.New("failed to delete token_bucket_bucket_capacity gauge from its metric vector")) + errMulti = multierr.Append(errMulti, errors.New("failed to delete token_bucket_capacity gauge from its metric vector")) } deleted = lsaFactory.tokenBucketAvailableTokensGaugeVec.Delete(metricLabels) if !deleted { diff --git a/tools/load_generator/scenarios/load_test.js b/tools/load_generator/scenarios/load_test.js index 431a55518e..e01d2060f6 100644 --- a/tools/load_generator/scenarios/load_test.js +++ b/tools/load_generator/scenarios/load_test.js @@ -2,12 +2,12 @@ import http from "k6/http"; import { check } from "k6"; export let vuStages = [ - { duration: "30s", target: 5 }, // simulate ramp-up of traffic from 0 to 5 users over 30 seconds - { duration: "30s", target: 5 }, // stay at 5 users for 30s minutes - { duration: "2m", target: 15 }, // ramp-up to 10 users over 1 minutes + { duration: "1s", target: 5 }, // simulate ramp-up of traffic from 0 to 5 users over 30 seconds + { duration: "2m", target: 5 }, // stay at 5 users for 30s minutes + { duration: "1m", target: 15 }, // ramp-up to 10 users over 1 minutes { duration: "2m", target: 15 }, // stay at 10 users for 2 minutes (peak hour) - { duration: "10s", target: 5 }, // ramp-down to 5 users in 10 seconds - { duration: "2m", target: 5 }, // stay at to 5 users in 30 seconds + { duration: "1s", target: 5 }, // ramp-down to 5 users in 10 seconds + { duration: "5m", target: 5 }, // stay at to 5 users in 30 seconds ]; export let options = {