grafana · dimitarvdimitrov · Feb 13, 2024 · Feb 3, 2024 · Feb 3, 2024 · Feb 3, 2024
diff --git a/operations/helm/charts/mimir-distributed/CHANGELOG.md b/operations/helm/charts/mimir-distributed/CHANGELOG.md
@@ -28,6 +28,7 @@ Entries should include a reference to the Pull Request that introduced the chang
 
 ## main / unreleased
 
+* [FEATURE] Added experimental feature for deploying [KEDA](https://keda.sh) ScaledObjects as part of the helm chart for the components: distributor, querier, query-frontend and ruler. Autoscaling can be enabled via `distributor.kedaAutoscaling`, `ruler.kedaAutoscaling`, `query_frontend.kedaAutoscaling`, and `querier.kedaAutoscaling`. Requires metamonitoring, for more details on metamonitoring see [Monitor the health of your system](https://grafana.com/docs/helm-charts/mimir-distributed/latest/run-production-environment-with-helm/monitor-system-health/). See [grafana/mimir#7367](https://github.com/grafana/mimir/issues/7367) for a migration procedure. #7282
 * [CHANGE] Rollout-operator: remove default CPU limit. #7125
 * [CHANGE] Ring: relaxed the hash ring heartbeat period and timeout for distributor, ingester, store-gateway and compactor: #6860
   * `-distributor.ring.heartbeat-period` set to `1m`

@@ -0,0 +1,52 @@
+# Pin kube version so results are the same for running in CI and locally where the installed kube version may be different.
+kubeVersionOverride: "1.20"
+
+metaMonitoring:
+  grafanaAgent:
+    metrics:
+      enabled: false
+      remote:
+        url: https://mimir.example.com/api/v1/push # test with setting a different remote for the monitoring
+
+distributor:
+  kedaAutoscaling:
+    enabled: true
+    minReplicaCount: 1
+    maxReplicaCount: 10
+    pollingInterval: 10
+    targetCPUUtilizationPercentage: 80
+    targetMemoryUtilizationPercentage: 80
+    customHeaders:
+      X-Scope-OrgID: tenant-1
+
+ruler:
+  kedaAutoscaling:
+    enabled: true
+    minReplicaCount: 1
+    maxReplicaCount: 10
+    pollingInterval: 10
+    targetCPUUtilizationPercentage: 80
+    targetMemoryUtilizationPercentage: 80
+    customHeaders:
+      X-Scope-OrgID: tenant-1
+
+querier:
+  kedaAutoscaling:
+    enabled: true
+    minReplicaCount: 2
+    maxReplicaCount: 10
+    pollingInterval: 10
+    querySchedulerInflightRequestsThreshold: 6
+    customHeaders:
+      X-Scope-OrgID: tenant-1
+
+query_frontend:
+  kedaAutoscaling:
+    enabled: true
+    minReplicaCount: 1
+    maxReplicaCount: 10
+    pollingInterval: 10
+    targetCPUUtilizationPercentage: 80
+    targetMemoryUtilizationPercentage: 80
+    customHeaders:
+      X-Scope-OrgID: tenant-1
@@ -0,0 +1,52 @@
+# Pin kube version so results are the same for running in CI and locally where the installed kube version may be different.
+kubeVersionOverride: "1.20"
+
+metaMonitoring:
+  grafanaAgent:
+    metrics:
+      enabled: false
+      # Leave the remote empty to use the default to send it to Mimir directly
+      # remote: #
+
+distributor:
+  kedaAutoscaling:
+    enabled: true
+    minReplicaCount: 1
+    maxReplicaCount: 10
+    pollingInterval: 10
+    targetCPUUtilizationPercentage: 80
+    targetMemoryUtilizationPercentage: 80
+    customHeaders:
+      X-Scope-OrgID: tenant-1
+
+ruler:
+  kedaAutoscaling:
+    enabled: true
+    minReplicaCount: 1
+    maxReplicaCount: 10
+    pollingInterval: 10
+    targetCPUUtilizationPercentage: 80
+    targetMemoryUtilizationPercentage: 80
+    customHeaders:
+      X-Scope-OrgID: tenant-1
+
+querier:
+  kedaAutoscaling:
+    enabled: true
+    minReplicaCount: 2
+    maxReplicaCount: 10
+    pollingInterval: 10
+    querySchedulerInflightRequestsThreshold: 6
+    customHeaders:
+      X-Scope-OrgID: tenant-1
+
+query_frontend:
+  kedaAutoscaling:
+    enabled: true
+    minReplicaCount: 1
+    maxReplicaCount: 10
+    pollingInterval: 10
+    targetCPUUtilizationPercentage: 80
+    targetMemoryUtilizationPercentage: 80
+    customHeaders:
+      X-Scope-OrgID: tenant-1
@@ -497,6 +497,10 @@ Return if we should create a SecurityContextConstraints. Takes into account user
 {{ include "mimir.gatewayUrl" . }}/api/v1/push
 {{- end -}}
 
+{{- define "mimir.remoteReadUrl.inCluster" -}}
+{{ include "mimir.gatewayUrl" . }}{{ include "mimir.prometheusHttpPrefix" . }}
+{{- end -}}
+
 {{/*
 Creates dict for zone-aware replication configuration
 Params:

@@ -8,10 +8,12 @@ metadata:
     {{- toYaml .Values.distributor.annotations | nindent 4 }}
   namespace: {{ .Release.Namespace | quote }}
 spec:
+  {{- if not .Values.distributor.kedaAutoscaling.enabled }}
   # If replicas is not number (when using values file it's float64, when using --set arg it's int64) and is false (i.e. null) don't set it
   {{- if or (or (kindIs "int64" .Values.distributor.replicas) (kindIs "float64" .Values.distributor.replicas)) (.Values.distributor.replicas) }}
   replicas: {{ .Values.distributor.replicas }}
   {{- end }}
+  {{- end }}
   selector:
     matchLabels:
       {{- include "mimir.selectorLabels" (dict "ctx" . "component" "distributor" "memberlist" true) | nindent 6 }}

@@ -0,0 +1,44 @@
+{{- if .Values.distributor.kedaAutoscaling.enabled }}
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: {{ include "mimir.resourceName" (dict "ctx" . "component" "distributor") }}
+  labels:
+    {{- include "mimir.labels" (dict "ctx" . "component" "distributor") | nindent 4 }}
+  annotations:
+    {{- toYaml .Values.distributor.annotations | nindent 4 }}
+  namespace: {{ .Release.Namespace | quote }}
+spec:
+  advanced:
+    horizontalPodAutoscalerConfig:
+      {{- with .Values.distributor.kedaAutoscaling.behavior }}
+      behavior:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+  maxReplicaCount: {{ .Values.distributor.kedaAutoscaling.maxReplicaCount }}
+  minReplicaCount: {{ .Values.distributor.kedaAutoscaling.minReplicaCount }}
+  pollingInterval: {{ .Values.distributor.kedaAutoscaling.pollingInterval }}
+  scaleTargetRef:
+    name: {{ include "mimir.resourceName" (dict "ctx" . "component" "distributor") }}
+    apiVersion: apps/v1
+    kind: Deployment
+  triggers:
+  - metadata:
+      query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="distributor",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000
+      serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
+      {{- $cpu_request := dig "requests" "cpu" nil .Values.distributor.resources }}
+      threshold: {{ mulf (include "mimir.parseCPU" (dict "value" $cpu_request)) (divf .Values.distributor.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }}
+      {{- if .Values.distributor.kedaAutoscaling.customHeaders }}
+      customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.distributor.kedaAutoscaling.customHeaders)) | quote }}
+      {{- end }}
+    type: prometheus
+  - metadata:
+      query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="distributor",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0))
+      serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
+      {{- $mem_request := dig "requests" "memory" nil .Values.distributor.resources }}
+      threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.distributor.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }}
+      {{- if .Values.distributor.kedaAutoscaling.customHeaders }}
+      customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.distributor.kedaAutoscaling.customHeaders)) | quote }}
+      {{- end }}
+    type: prometheus
+{{- end }}
@@ -0,0 +1,17 @@
+{{/*
+Convert labels to string like: key1=value1, key2=value2, ...
+Example:
+    customHeaders:
+      X-Scope-OrgID: tenant-1
+becomes:
+    customHeaders: "X-Scope-OrgID=tenant-1"
+Params:
+  map = map to convert to csv string
+*/}}
+{{- define "mimir.lib.mapToCSVString" -}}
+{{- $list := list -}}
+{{- range $k, $v := $.map -}}
+{{- $list = append $list (printf "%s=%s" $k $v) -}}
+{{- end -}}
+{{ join "," $list }}
+{{- end -}}
@@ -56,3 +56,15 @@
     cluster: {{ include "mimir.clusterName" $.ctx | quote}}
 {{- end -}}
 {{- end -}}
+
+{{- define "mimir.metaMonitoring.metrics.remoteReadUrl" -}}
+{{- with $.ctx.Values.metaMonitoring.grafanaAgent.metrics }}
+{{- $writeBackToMimir := not (.remote).url -}}
+{{- if $writeBackToMimir -}}
+{{- include "mimir.remoteReadUrl.inCluster" $.ctx }}
+{{- else -}}
+{{- $parsed := urlParse (.remote).url -}}
+{{ $parsed.scheme }}://{{ $parsed.host }}/prometheus
+{{- end }}
+{{- end -}}
+{{- end -}}
@@ -8,10 +8,12 @@ metadata:
     {{- toYaml .Values.querier.annotations | nindent 4 }}
   namespace: {{ .Release.Namespace | quote }}
 spec:
+  {{- if not .Values.querier.kedaAutoscaling.enabled }}
   # If replicas is not number (when using values file it's float64, when using --set arg it's int64) and is false (i.e. null) don't set it
   {{- if or (or (kindIs "int64" .Values.querier.replicas) (kindIs "float64" .Values.querier.replicas)) (.Values.querier.replicas) }}
   replicas: {{ .Values.querier.replicas }}
   {{- end }}
+  {{- end }}
   selector:
     matchLabels:
       {{- include "mimir.selectorLabels" (dict "ctx" . "component" "querier" "memberlist" true) | nindent 6 }}

@@ -0,0 +1,47 @@
+{{- if .Values.querier.kedaAutoscaling.enabled }}
+{{- if not .Values.query_scheduler.enabled }}
+{{- fail "KEDA autoscaling for querier requires query scheduler to be enabled" }}
+{{- end }}
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: {{ include "mimir.resourceName" (dict "ctx" . "component" "querier") }}
+  labels:
+    {{- include "mimir.labels" (dict "ctx" . "component" "querier") | nindent 4 }}
+  annotations:
+    {{- toYaml .Values.querier.annotations | nindent 4 }}
+  namespace: {{ .Release.Namespace | quote }}
+spec:
+  advanced:
+    horizontalPodAutoscalerConfig:
+      {{- with .Values.querier.kedaAutoscaling.behavior }}
+      behavior:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+  maxReplicaCount: {{ .Values.querier.kedaAutoscaling.maxReplicaCount }}
+  minReplicaCount: {{ .Values.querier.kedaAutoscaling.minReplicaCount }}
+  pollingInterval: {{ .Values.querier.kedaAutoscaling.pollingInterval }}
+  scaleTargetRef:
+    name: {{ include "mimir.resourceName" (dict "ctx" . "component" "querier") }}
+    apiVersion: apps/v1
+    kind: Deployment
+  triggers:
+  - metadata:
+      query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[1m]))
+      serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
+      threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }}
+      {{- if .Values.querier.kedaAutoscaling.customHeaders }}
+      customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.querier.kedaAutoscaling.customHeaders)) | quote }}
+      {{- end }}
+    name: cortex_querier_hpa_default
+    type: prometheus
+  - metadata:
+      query: sum(rate(cortex_querier_request_duration_seconds_sum{container="querier",namespace="{{ .Release.Namespace }}"}[1m]))
+      serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
+      threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }}
+      {{- if .Values.querier.kedaAutoscaling.customHeaders }}
+      customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.querier.kedaAutoscaling.customHeaders)) | quote }}
+      {{- end }}
+    name: cortex_querier_hpa_default_requests_duration
+    type: prometheus
+{{- end }}
@@ -8,10 +8,12 @@ metadata:
     {{- toYaml .Values.query_frontend.annotations | nindent 4 }}
   namespace: {{ .Release.Namespace | quote }}
 spec:
+  {{- if not .Values.query_frontend.kedaAutoscaling.enabled }}
   # If replicas is not number (when using values file it's float64, when using --set arg it's int64) and is false (i.e. null) don't set it
   {{- if or (or (kindIs "int64" .Values.query_frontend.replicas) (kindIs "float64" .Values.query_frontend.replicas)) (.Values.query_frontend.replicas) }}
   replicas: {{ .Values.query_frontend.replicas }}
   {{- end }}
+  {{- end }}
   selector:
     matchLabels:
       {{- include "mimir.selectorLabels" (dict "ctx" . "component" "query-frontend") | nindent 6 }}

@@ -0,0 +1,44 @@
+{{- if .Values.query_frontend.kedaAutoscaling.enabled }}
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: {{ include "mimir.resourceName" (dict "ctx" . "component" "query-frontend") }}
+  labels:
+    {{- include "mimir.labels" (dict "ctx" . "component" "query-frontend") | nindent 4 }}
+  annotations:
+    {{- toYaml .Values.query_frontend.annotations | nindent 4 }}
+  namespace: {{ .Release.Namespace | quote }}
+spec:
+  advanced:
+    horizontalPodAutoscalerConfig:
+      {{- with .Values.query_frontend.kedaAutoscaling.behavior }}
+      behavior:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+  maxReplicaCount: {{ .Values.query_frontend.kedaAutoscaling.maxReplicaCount }}
+  minReplicaCount: {{ .Values.query_frontend.kedaAutoscaling.minReplicaCount }}
+  pollingInterval: {{ .Values.query_frontend.kedaAutoscaling.pollingInterval }}
+  scaleTargetRef:
+    name: {{ include "mimir.resourceName" (dict "ctx" . "component" "query-frontend") }}
+    apiVersion: apps/v1
+    kind: Deployment
+  triggers:
+  - metadata:
+      query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000
+      serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
+      {{- $cpu_request := dig "requests" "cpu" nil .Values.query_frontend.resources }}
+      threshold: {{ mulf (include "mimir.parseCPU" (dict "value" $cpu_request)) (divf .Values.query_frontend.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }}
+      {{- if .Values.query_frontend.kedaAutoscaling.customHeaders }}
+      customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.query_frontend.kedaAutoscaling.customHeaders)) | quote }}
+      {{- end }}
+    type: prometheus
+  - metadata:
+      query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="query-frontend",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0))
+      serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
+      {{- $mem_request := dig "requests" "memory" nil .Values.query_frontend.resources }}
+      threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.query_frontend.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }}
+      {{- if .Values.query_frontend.kedaAutoscaling.customHeaders }}
+      customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.query_frontend.kedaAutoscaling.customHeaders)) | quote }}
+      {{- end }}
+    type: prometheus
+{{- end }}
@@ -9,7 +9,9 @@ metadata:
     {{- toYaml .Values.ruler.annotations | nindent 4 }}
   namespace: {{ .Release.Namespace | quote }}
 spec:
+  {{- if not .Values.ruler.kedaAutoscaling.enabled }}
   replicas: {{ .Values.ruler.replicas }}
+  {{- end }}
   selector:
     matchLabels:
       {{- include "mimir.selectorLabels" (dict "ctx" . "component" "ruler" "memberlist" true) | nindent 6 }}

@@ -0,0 +1,46 @@
+{{- if .Values.ruler.kedaAutoscaling.enabled }}
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: {{ include "mimir.resourceName" (dict "ctx" . "component" "ruler") }}
+  labels:
+    {{- include "mimir.labels" (dict "ctx" . "component" "ruler") | nindent 4 }}
+  annotations:
+    {{- toYaml .Values.ruler.annotations | nindent 4 }}
+  namespace: {{ .Release.Namespace | quote }}
+spec:
+  advanced:
+    horizontalPodAutoscalerConfig:
+      {{- with .Values.ruler.kedaAutoscaling.behavior }}
+      behavior:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+  maxReplicaCount: {{ .Values.ruler.kedaAutoscaling.maxReplicaCount }}
+  minReplicaCount: {{ .Values.ruler.kedaAutoscaling.minReplicaCount }}
+  pollingInterval: {{ .Values.ruler.kedaAutoscaling.pollingInterval }}
+  scaleTargetRef:
+    name: {{ include "mimir.resourceName" (dict "ctx" . "component" "ruler") }}
+    apiVersion: apps/v1
+    kind: Deployment
+  triggers:
+  - metadata:
+      query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000
+      query: max_over_time(sum(rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"}[5m]))[15m:]) * 1000
+      serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
+      {{- $cpu_request := dig "requests" "cpu" nil .Values.ruler.resources }}
+      threshold: {{ mulf (include "mimir.parseCPU" (dict "value" $cpu_request)) (divf .Values.ruler.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }}
+      {{- if .Values.ruler.kedaAutoscaling.customHeaders }}
+      customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.ruler.kedaAutoscaling.customHeaders)) | quote }}
+      {{- end }}
+    type: prometheus
+  - metadata:
+      query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0))
+      query: max_over_time(sum(container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"})[15m:])
+      serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
+      {{- $mem_request := dig "requests" "memory" nil .Values.ruler.resources }}
+      threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.ruler.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }}
+      {{- if .Values.ruler.kedaAutoscaling.customHeaders }}
+      customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.ruler.kedaAutoscaling.customHeaders)) | quote }}
+      {{- end }}
+    type: prometheus
+{{- end }}