From 53890e931215116b692e6c38e14f3960de45faef Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Sat, 21 Jan 2017 15:14:57 -0600 Subject: [PATCH 01/41] Initial commit --- .gitignore | 1 + README.md | 42 + kube-prometheus/.helmignore | 21 + kube-prometheus/Chart.yaml | 9 + .../grafana/all-nodes-dashboard.json | 880 +++++++++++++++++ .../grafana/deployment-dashboard.json | 817 ++++++++++++++++ .../grafana/kubernetes-pods-dashboard.json | 409 ++++++++ kube-prometheus/grafana/node-dashboard.json | 893 ++++++++++++++++++ kube-prometheus/rules/etcd2.rules | 123 +++ kube-prometheus/rules/kubernetes.rules | 387 ++++++++ kube-prometheus/templates/NOTES.txt | 47 + kube-prometheus/templates/_helpers.tpl | 16 + .../templates/alertmanager-configmap.yaml | 25 + .../templates/alertmanager-ingress.yaml | 34 + .../templates/alertmanager-service.yaml | 27 + .../templates/delete-alertmanager-job.yaml | 31 + .../templates/delete-prometheus-job.yaml | 31 + .../templates/grafana-configmap.yaml | 20 + .../templates/grafana-deployment.yaml | 58 ++ .../templates/grafana-ingress.yaml | 33 + .../templates/grafana-service.yaml | 24 + .../templates/kube-apiserver-service.yaml | 21 + .../kube-controller-manager-service.yaml | 21 + .../templates/kube-dns-service.yaml | 25 + .../templates/kube-scheduler-service.yaml | 21 + .../kube-state-metrics-deployment.yaml | 28 + .../templates/kube-state-metrics-service.yaml | 21 + .../templates/node-exporter-daemonset.yaml | 47 + .../templates/node-exporter-service.yaml | 21 + .../templates/prometheus-k8s-configmap.yaml | 93 ++ .../templates/prometheus-k8s-ingress.yaml | 34 + .../templates/prometheus-k8s-rules.yaml | 13 + .../templates/prometheus-k8s-service.yaml | 27 + kube-prometheus/values.yaml | 296 ++++++ prometheus-operator/.helmignore | 21 + prometheus-operator/Chart.yaml | 9 + prometheus-operator/templates/NOTES.txt | 6 + prometheus-operator/templates/_helpers.tpl | 16 + .../templates/delete-tprs-job.yaml | 33 + prometheus-operator/templates/deployment.yaml | 29 + .../templates/get-tprs-job.yaml | 33 + prometheus-operator/values.yaml | 31 + 42 files changed, 4774 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 kube-prometheus/.helmignore create mode 100644 kube-prometheus/Chart.yaml create mode 100644 kube-prometheus/grafana/all-nodes-dashboard.json create mode 100644 kube-prometheus/grafana/deployment-dashboard.json create mode 100644 kube-prometheus/grafana/kubernetes-pods-dashboard.json create mode 100644 kube-prometheus/grafana/node-dashboard.json create mode 100644 kube-prometheus/rules/etcd2.rules create mode 100644 kube-prometheus/rules/kubernetes.rules create mode 100644 kube-prometheus/templates/NOTES.txt create mode 100644 kube-prometheus/templates/_helpers.tpl create mode 100644 kube-prometheus/templates/alertmanager-configmap.yaml create mode 100644 kube-prometheus/templates/alertmanager-ingress.yaml create mode 100644 kube-prometheus/templates/alertmanager-service.yaml create mode 100644 kube-prometheus/templates/delete-alertmanager-job.yaml create mode 100644 kube-prometheus/templates/delete-prometheus-job.yaml create mode 100644 kube-prometheus/templates/grafana-configmap.yaml create mode 100644 kube-prometheus/templates/grafana-deployment.yaml create mode 100644 kube-prometheus/templates/grafana-ingress.yaml create mode 100644 kube-prometheus/templates/grafana-service.yaml create mode 100644 kube-prometheus/templates/kube-apiserver-service.yaml create mode 100644 kube-prometheus/templates/kube-controller-manager-service.yaml create mode 100644 kube-prometheus/templates/kube-dns-service.yaml create mode 100644 kube-prometheus/templates/kube-scheduler-service.yaml create mode 100644 kube-prometheus/templates/kube-state-metrics-deployment.yaml create mode 100644 kube-prometheus/templates/kube-state-metrics-service.yaml create mode 100644 kube-prometheus/templates/node-exporter-daemonset.yaml create mode 100644 kube-prometheus/templates/node-exporter-service.yaml create mode 100644 kube-prometheus/templates/prometheus-k8s-configmap.yaml create mode 100644 kube-prometheus/templates/prometheus-k8s-ingress.yaml create mode 100644 kube-prometheus/templates/prometheus-k8s-rules.yaml create mode 100644 kube-prometheus/templates/prometheus-k8s-service.yaml create mode 100644 kube-prometheus/values.yaml create mode 100644 prometheus-operator/.helmignore create mode 100644 prometheus-operator/Chart.yaml create mode 100644 prometheus-operator/templates/NOTES.txt create mode 100644 prometheus-operator/templates/_helpers.tpl create mode 100644 prometheus-operator/templates/delete-tprs-job.yaml create mode 100644 prometheus-operator/templates/deployment.yaml create mode 100644 prometheus-operator/templates/get-tprs-job.yaml create mode 100644 prometheus-operator/values.yaml diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..aa1ec1ea --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.tgz diff --git a/README.md b/README.md new file mode 100644 index 00000000..bf1dee9b --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +# kube-prometheus-chart + +[Helm](http://helm.sh/) chart for installing end-to-end [Prometheus](https://prometheus.io/) monitoring of [Kubernetes](https://kubernetes.io/) clusters using the [kube-prometheus](https://github.com/coreos/kube-prometheus) & [prometheus-operator](https://github.com/coreos/prometheus-operator) projects from [CoreOS](https://coreos.com/). + +**As the upstream projects are very much in an alpha state, so is this one. Everything is subject to change.** + +## Components +- [kube-prometheus](https://github.com/coreos/kube-prometheus): main chart +- [prometheus-operator](https://github.com/coreos/prometheus-operator): stand-alone chart, also dependency of `kube-prometheus` + +## Installation +1. Package `prometheus-operator` & move to `kube-operator/charts`: + ```console + helm package prometheus-operator + mv prometheus-operator-.tgz kube-prometheus/charts + ``` + +2. (Optional) Create a copy of `kube-prometheus/values.yaml` and customize as desired: + ```console + cp kube-prometheus/values.yaml custom-values.yaml + ``` + +3. Package & install `kube-prometheus`: + ```console + helm package kube-prometheus + helm install kube-prometheus-.tgz [-f custom-values.yaml] + ``` + +4. After Helm has installed the chart, execute the command displayed on screen: + ```console + bash -c 'cat < + EOF' + ``` + +## TODO +- [ ] Allow customization of Grafana (access control in particular) +- [ ] Enable etcd discovery & monitoring +- [ ] Enable cleanup of PVCs +- Documentation + - [ ] Cluster/component prerequisites + - [ ] ServiceMonitor configuration diff --git a/kube-prometheus/.helmignore b/kube-prometheus/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/kube-prometheus/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/kube-prometheus/Chart.yaml b/kube-prometheus/Chart.yaml new file mode 100644 index 00000000..5232ed0e --- /dev/null +++ b/kube-prometheus/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +description: Manifests, dashboards, and alerting rules for end-to-end Kubernetes cluster monitoring. +engine: gotpl +home: https://github.com/coreos/kube-prometheus +maintainers: + - name: Michael Goodness + email: mgoodness@gmail.com +name: kube-prometheus +version: 0.1.0 diff --git a/kube-prometheus/grafana/all-nodes-dashboard.json b/kube-prometheus/grafana/all-nodes-dashboard.json new file mode 100644 index 00000000..c0bbe251 --- /dev/null +++ b/kube-prometheus/grafana/all-nodes-dashboard.json @@ -0,0 +1,880 @@ +{ + "dashboard": { + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "3.1.1" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "id": null, + "title": "All Nodes", + "description": "Dashboard to get an overview of one server", + "tags": [ + "prometheus" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": false, + "sharedCrosshair": false, + "rows": [ + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 3, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_cpu{mode=\"idle\"}[2m])) * 100", + "hide": false, + "intervalFactor": 10, + "legendFormat": "", + "refId": "A", + "step": 50 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Idle cpu", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "percent", + "label": "cpu usage", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 9, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_load1)", + "intervalFactor": 4, + "legendFormat": "load 1m", + "refId": "A", + "step": 20, + "target": "" + }, + { + "expr": "sum(node_load5)", + "intervalFactor": 4, + "legendFormat": "load 5m", + "refId": "B", + "step": 20, + "target": "" + }, + { + "expr": "sum(node_load15)", + "intervalFactor": 4, + "legendFormat": "load 15m", + "refId": "C", + "step": 20, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 4, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}", + "yaxis": 2 + } + ], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_memory_MemTotal) - sum(node_memory_MemFree)", + "intervalFactor": 2, + "legendFormat": "free memory", + "metric": "memo", + "refId": "A", + "step": 4, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Free memory", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 5, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "(sum(node_memory_MemFree) / sum(node_memory_MemTotal)) * 100", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "target": "" + } + ], + "thresholds": "10, 20", + "title": "Free memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 6, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "read", + "yaxis": 1 + }, + { + "alias": "{instance=\"172.17.0.1:9100\"}", + "yaxis": 2 + }, + { + "alias": "io time", + "yaxis": 2 + } + ], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(node_disk_bytes_read[5m]))", + "hide": false, + "intervalFactor": 4, + "legendFormat": "read", + "refId": "A", + "step": 8, + "target": "" + }, + { + "expr": "sum(irate(node_disk_bytes_written[5m]))", + "intervalFactor": 4, + "legendFormat": "written", + "refId": "B", + "step": 8 + }, + { + "expr": "sum(irate(node_disk_io_time_ms[5m]))", + "intervalFactor": 4, + "legendFormat": "io time", + "refId": "C", + "step": 8 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "sum(node_filesystem_free{device!=\"rootfs\"}) / sum(node_filesystem_size{device!=\"rootfs\"})", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "target": "" + } + ], + "thresholds": "0.10, 0.25", + "title": "Free disk space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 8, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "transmitted ", + "yaxis": 2 + } + ], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(node_network_receive_bytes{device!~\"lo\"}[5m]))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network received", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 10, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "transmitted ", + "yaxis": 2 + } + ], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(node_network_transmit_bytes{device!~\"lo\"}[5m]))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network transmitted", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + } + ], + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [] + }, + "annotations": { + "list": [] + }, + "refresh": false, + "schemaVersion": 12, + "version": 10, + "links": [], + "gnetId": 22 +}, + "inputs": [ + { + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "type": "datasource", + "value": "prometheus" + } + ], + "overwrite": true +} diff --git a/kube-prometheus/grafana/deployment-dashboard.json b/kube-prometheus/grafana/deployment-dashboard.json new file mode 100644 index 00000000..db6b240c --- /dev/null +++ b/kube-prometheus/grafana/deployment-dashboard.json @@ -0,0 +1,817 @@ +{ + "dashboard": { + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "3.1.1" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "id": null, + "title": "Deployment", + "tags": [], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": false, + "sharedCrosshair": true, + "rows": [ + { + "collapse": false, + "editable": true, + "height": "200px", + "panels": [ + { + "title": "CPU", + "error": false, + "span": 4, + "editable": true, + "type": "singlestat", + "isNew": true, + "id": 8, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) ", + "intervalFactor": 2, + "step": 600 + } + ], + "links": [], + "datasource": "${DS_PROMETHEUS}", + "maxDataPoints": 100, + "interval": null, + "cacheTimeout": null, + "format": "none", + "prefix": "", + "postfix": "cores", + "nullText": null, + "valueMaps": [ + { + "value": "null", + "op": "=", + "text": "N/A" + } + ], + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "rangeMaps": [ + { + "from": "null", + "to": "null", + "text": "N/A" + } + ], + "mappingType": 1, + "nullPointMode": "connected", + "valueName": "avg", + "prefixFontSize": "50%", + "valueFontSize": "110%", + "postfixFontSize": "50%", + "thresholds": "", + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "sparkline": { + "show": true, + "full": false, + "lineColor": "rgb(31, 120, 193)", + "fillColor": "rgba(31, 118, 189, 0.18)" + }, + "gauge": { + "show": false, + "minValue": 0, + "maxValue": 100, + "thresholdMarkers": true, + "thresholdLabels": false + } + }, + { + "title": "Memory", + "error": false, + "span": 4, + "editable": true, + "type": "singlestat", + "isNew": true, + "id": 9, + "targets": [ + { + "refId": "A", + "expr": "sum(container_memory_usage_bytes{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}) / 1024^3", + "intervalFactor": 2, + "step": 600 + } + ], + "links": [], + "datasource": "${DS_PROMETHEUS}", + "maxDataPoints": 100, + "interval": null, + "cacheTimeout": null, + "format": "none", + "prefix": "", + "postfix": "GB", + "nullText": null, + "valueMaps": [ + { + "value": "null", + "op": "=", + "text": "N/A" + } + ], + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "rangeMaps": [ + { + "from": "null", + "to": "null", + "text": "N/A" + } + ], + "mappingType": 1, + "nullPointMode": "connected", + "valueName": "avg", + "prefixFontSize": "80%", + "valueFontSize": "110%", + "postfixFontSize": "50%", + "thresholds": "", + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "sparkline": { + "show": true, + "full": false, + "lineColor": "rgb(31, 120, 193)", + "fillColor": "rgba(31, 118, 189, 0.18)" + }, + "gauge": { + "show": false, + "minValue": 0, + "maxValue": 100, + "thresholdMarkers": true, + "thresholdLabels": false + } + }, + { + "title": "Network", + "error": false, + "span": 4, + "editable": true, + "type": "singlestat", + "isNew": true, + "id": 7, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) ", + "intervalFactor": 2, + "step": 600 + } + ], + "links": [], + "datasource": "${DS_PROMETHEUS}", + "maxDataPoints": 100, + "interval": null, + "cacheTimeout": null, + "format": "Bps", + "prefix": "", + "postfix": "", + "nullText": null, + "valueMaps": [ + { + "value": "null", + "op": "=", + "text": "N/A" + } + ], + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "rangeMaps": [ + { + "from": "null", + "to": "null", + "text": "N/A" + } + ], + "mappingType": 1, + "nullPointMode": "connected", + "valueName": "avg", + "prefixFontSize": "50%", + "valueFontSize": "80%", + "postfixFontSize": "50%", + "thresholds": "", + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "sparkline": { + "show": true, + "full": false, + "lineColor": "rgb(31, 120, 193)", + "fillColor": "rgba(31, 118, 189, 0.18)" + }, + "gauge": { + "show": false, + "minValue": 0, + "maxValue": 100, + "thresholdMarkers": false, + "thresholdLabels": false + } + } + ], + "title": "Row", + "showTitle": false + }, + { + "title": "New row", + "height": "100px", + "editable": true, + "collapse": false, + "panels": [ + { + "title": "Desired Replicas", + "error": false, + "span": 3, + "editable": true, + "type": "singlestat", + "isNew": true, + "id": 5, + "targets": [ + { + "refId": "A", + "expr": "kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "step": 600, + "metric": "kube_deployment_spec_replicas" + } + ], + "links": [], + "datasource": "${DS_PROMETHEUS}", + "maxDataPoints": 100, + "interval": null, + "cacheTimeout": null, + "format": "none", + "prefix": "", + "postfix": "", + "nullText": null, + "valueMaps": [ + { + "value": "null", + "op": "=", + "text": "N/A" + } + ], + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "rangeMaps": [ + { + "from": "null", + "to": "null", + "text": "N/A" + } + ], + "mappingType": 1, + "nullPointMode": "connected", + "valueName": "avg", + "prefixFontSize": "50%", + "valueFontSize": "80%", + "postfixFontSize": "50%", + "thresholds": "", + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "sparkline": { + "show": false, + "full": false, + "lineColor": "rgb(31, 120, 193)", + "fillColor": "rgba(31, 118, 189, 0.18)" + }, + "gauge": { + "show": false, + "minValue": 0, + "maxValue": 100, + "thresholdMarkers": false, + "thresholdLabels": false + }, + "decimals": null + }, + { + "title": "Available Replicas", + "error": false, + "span": 3, + "editable": true, + "type": "singlestat", + "isNew": true, + "id": 6, + "targets": [ + { + "refId": "A", + "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "step": 600 + } + ], + "links": [], + "datasource": "${DS_PROMETHEUS}", + "maxDataPoints": 100, + "interval": null, + "cacheTimeout": null, + "format": "none", + "prefix": "", + "postfix": "", + "nullText": null, + "valueMaps": [ + { + "value": "null", + "op": "=", + "text": "N/A" + } + ], + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "rangeMaps": [ + { + "from": "null", + "to": "null", + "text": "N/A" + } + ], + "mappingType": 1, + "nullPointMode": "connected", + "valueName": "avg", + "prefixFontSize": "50%", + "valueFontSize": "80%", + "postfixFontSize": "50%", + "thresholds": "", + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "sparkline": { + "show": false, + "full": false, + "lineColor": "rgb(31, 120, 193)", + "fillColor": "rgba(31, 118, 189, 0.18)" + }, + "gauge": { + "show": false, + "minValue": 0, + "maxValue": 100, + "thresholdMarkers": true, + "thresholdLabels": false + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 3, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "kube_deployment_status_observed_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 600 + } + ], + "thresholds": "", + "title": "Observed Generation", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 2, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "kube_deployment_metadata_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 600 + } + ], + "thresholds": "", + "title": "Metadata Generation", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ] + }, + { + "collapse": false, + "editable": true, + "height": "350px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 1, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false, + "hideZero": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kube_deployment_status_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "current replicas", + "refId": "A", + "step": 30 + }, + { + "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "available", + "refId": "B", + "step": 30 + }, + { + "expr": "kube_deployment_status_replicas_unavailable{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "unavailable", + "refId": "C", + "step": 30 + }, + { + "expr": "kube_deployment_status_replicas_updated{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "updated", + "refId": "D", + "step": 30 + }, + { + "expr": "kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "desired", + "refId": "E", + "step": 30 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Replicas", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "transparent": false + } + ], + "title": "New row", + "showTitle": false + } + ], + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [ + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "deployment_namespace", + "options": [], + "query": "label_values(kube_deployment_metadata_generation, namespace)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Deployment", + "multi": false, + "name": "deployment_name", + "options": [], + "query": "label_values(kube_deployment_metadata_generation{namespace=\"$deployment_namespace\"}, deployment)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "deployment", + "type": "query", + "useTags": false + } + ] + }, + "annotations": { + "list": [] + }, + "schemaVersion": 12, + "version": 2, + "links": [], + "gnetId": null +}, + "inputs": [ + { + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "type": "datasource", + "value": "prometheus" + } + ], + "overwrite": true +} diff --git a/kube-prometheus/grafana/kubernetes-pods-dashboard.json b/kube-prometheus/grafana/kubernetes-pods-dashboard.json new file mode 100644 index 00000000..035da015 --- /dev/null +++ b/kube-prometheus/grafana/kubernetes-pods-dashboard.json @@ -0,0 +1,409 @@ +{ + "dashboard": { + "__inputs": [ + { + "description": "", + "label": "prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "__requires": [ + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "" + }, + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "3.1.1" + }, + { + "id": "prometheus", + "name": "Prometheus", + "type": "datasource", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "rows": [ + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 1, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Current: {{ container_name }}", + "metric": "container_memory_usage_bytes", + "refId": "A", + "step": 10 + }, + { + "expr": "kube_pod_container_requested_memory_bytes{pod=\"$pod\", container=~\"$container\"}", + "interval": "10s", + "intervalFactor": 2, + "legendFormat": "Requested: {{ container }}", + "metric": "kube_pod_container_requested_memory_bytes", + "refId": "B", + "step": 20 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "Row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 2, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m] ) )", + "intervalFactor": 2, + "legendFormat": "{{ container_name }}", + "refId": "A", + "step": 30 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 3, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (pod_name) (rate (container_network_receive_bytes_total{pod_name=\"$pod\"}[1m]) ))", + "intervalFactor": 2, + "legendFormat": "{{ pod_name }}", + "refId": "A", + "step": 30 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + } + ], + "schemaVersion": 12, + "sharedCrosshair": true, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(kube_pod_info, namespace)", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [], + "query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)", + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Container", + "multi": false, + "name": "container", + "options": [], + "query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)", + "refresh": 1, + "regex": "", + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Pods", + "version": 26 + }, + "inputs": [ + { + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "type": "datasource", + "value": "prometheus" + } + ], + "overwrite": true +} diff --git a/kube-prometheus/grafana/node-dashboard.json b/kube-prometheus/grafana/node-dashboard.json new file mode 100644 index 00000000..6f90c9fd --- /dev/null +++ b/kube-prometheus/grafana/node-dashboard.json @@ -0,0 +1,893 @@ +{ + "dashboard": { + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "3.1.1" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "id": null, + "title": "Nodes", + "description": "Dashboard to get an overview of one server", + "tags": [ + "prometheus" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": false, + "sharedCrosshair": false, + "rows": [ + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold2": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 3, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg by (cpu) (irate(node_cpu{mode=\"idle\", instance=~\"$server\"}[5m])) * 100)", + "hide": false, + "intervalFactor": 10, + "legendFormat": "{{cpu}}", + "refId": "A", + "step": 50 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Idle cpu", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "percent", + "label": "cpu usage", + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold2": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 9, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=~\"$server\"}", + "intervalFactor": 4, + "legendFormat": "load 1m", + "refId": "A", + "step": 20, + "target": "" + }, + { + "expr": "node_load5{instance=~\"$server\"}", + "intervalFactor": 4, + "legendFormat": "load 5m", + "refId": "B", + "step": 20, + "target": "" + }, + { + "expr": "node_load15{instance=~\"$server\"}", + "intervalFactor": 4, + "legendFormat": "load 15m", + "refId": "C", + "step": 20, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold2": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 4, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}", + "yaxis": 2 + } + ], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal{instance=~\"$server\"} - node_memory_MemFree{instance=~\"$server\"}", + "intervalFactor": 2, + "legendFormat": "free memory", + "metric": "memo", + "refId": "A", + "step": 4, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Free memory", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 5, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "(node_memory_MemFree{instance=~\"$server\"} / node_memory_MemTotal{instance=~\"$server\"}) * 100", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "target": "" + } + ], + "thresholds": "10, 20", + "title": "Free memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold2": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 6, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "read", + "yaxis": 1 + }, + { + "alias": "{instance=\"172.17.0.1:9100\"}", + "yaxis": 2 + }, + { + "alias": "io time", + "yaxis": 2 + } + ], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (irate(node_disk_bytes_read{instance=~\"$server\"}[5m]))", + "hide": false, + "intervalFactor": 4, + "legendFormat": "read", + "refId": "A", + "step": 8, + "target": "" + }, + { + "expr": "sum by (instance) (irate(node_disk_bytes_written{instance=~\"$server\"}[5m]))", + "intervalFactor": 4, + "legendFormat": "written", + "refId": "B", + "step": 8 + }, + { + "expr": "sum by (instance) (irate(node_disk_io_time_ms{instance=~\"$server\"}[5m]))", + "intervalFactor": 4, + "legendFormat": "io time", + "refId": "C", + "step": 8 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "sum(node_filesystem_free{device!=\"rootfs\",instance=~\"$server\"}) / sum(node_filesystem_size{device!=\"rootfs\",instance=~\"$server\"})", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "target": "" + } + ], + "thresholds": "0.10, 0.25", + "title": "Free disk space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold2": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 8, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "transmitted ", + "yaxis": 2 + } + ], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes{instance=~\"$server\",device!~\"lo\"}[5m])", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network received", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "alert": { + "crit": { + "op": ">" + }, + "warn": { + "op": ">" + } + }, + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold2": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 10, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "transmitted ", + "yaxis": 2 + } + ], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_transmit_bytes{instance=~\"$server\",device!~\"lo\"}[5m])", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "B", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network transmitted", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + } + ], + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [ + { + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "server", + "options": [], + "query": "label_values(node_boot_time, instance)", + "refresh": 1, + "type": "query" + } + ] + }, + "annotations": { + "list": [] + }, + "refresh": false, + "schemaVersion": 12, + "version": 1, + "links": [], + "gnetId": 22 +}, + "inputs": [ + { + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "type": "datasource", + "value": "prometheus" + } + ], + "overwrite": true +} diff --git a/kube-prometheus/rules/etcd2.rules b/kube-prometheus/rules/etcd2.rules new file mode 100644 index 00000000..98f90005 --- /dev/null +++ b/kube-prometheus/rules/etcd2.rules @@ -0,0 +1,123 @@ +### General cluster availability ### + +# alert if another failed peer will result in an unavailable cluster +ALERT InsufficientPeers + IF count(up{job="etcd-k8s"} == 0) > (count(up{job="etcd-k8s"}) / 2 - 1) + FOR 3m + LABELS { + severity = "critical" + } + ANNOTATIONS { + summary = "Etcd cluster small", + description = "If one more etcd peer goes down the cluster will be unavailable", + } + +### HTTP requests alerts ### + +# alert if more than 1% of requests to an HTTP endpoint have failed with a non 4xx response +ALERT HighNumberOfFailedHTTPRequests + IF sum by(method) (rate(etcd_http_failed_total{job="etcd-k8s", code!~"4[0-9]{2}"}[5m])) / sum by(method) (rate(etcd_http_received_total{job="etcd-k8s"}[5m])) > 0.01 + FOR 10m + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "a high number of HTTP requests are failing", + description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}", + } + +# alert if more than 5% of requests to an HTTP endpoint have failed with a non 4xx response +ALERT HighNumberOfFailedHTTPRequests + IF sum by(method) (rate(etcd_http_failed_total{job="etcd-k8s", code!~"4[0-9]{2}"}[5m])) / sum by(method) (rate(etcd_http_received_total{job="etcd-k8s"}[5m])) > 0.05 + FOR 5m + LABELS { + severity = "critical" + } + ANNOTATIONS + { + summary = "a high number of HTTP requests are failing", + description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}", + } + +# alert if 50% of requests get a 4xx response +ALERT HighNumberOfFailedHTTPRequests + IF sum by(method) (rate(etcd_http_failed_total{job="etcd-k8s", code=~"4[0-9]{2}"}[5m])) / sum by(method) (rate(etcd_http_received_total{job="etcd-k8s"}[5m])) > 0.5 + FOR 10m + LABELS { + severity = "critical" + } + ANNOTATIONS + { + summary = "a high number of HTTP requests are failing", + description = "{{ $value }}% of requests for {{ $labels.method }} failed with 4xx responses on etcd instance {{ $labels.instance }}", + } + +# alert if the 99th percentile of HTTP requests take more than 150ms +ALERT HTTPRequestsSlow + IF histogram_quantile(0.99, rate(etcd_http_successful_duration_second_bucket[5m])) > 0.15 + FOR 10m + LABELS + { + severity = "warning" + } + ANNOTATIONS { + summary = "slow HTTP requests", + description = "on ectd instance {{ $labels.instance }} HTTP requests to {{ $label.method }} are slow", + } + +### File descriptor alerts + +instance:fd_utilization = process_open_fds / process_max_fds + +# alert if file descriptors are likely to exhaust within the next 4 hours +ALERT FdExhaustionClose + IF predict_linear(instance:fd_utilization[1h], 3600 * 4) > 1 + FOR 10m + LABELS + { + severity = "warning" + } + ANNOTATIONS { + summary = "file descriptors soon exhausted", + description = "{{ $labels.job }} instance {{ $labels.instance }} will exhaust in file descriptors soon", + } + +# alert if file descriptors are likely to exhaust within the next hour +ALERT FdExhaustionClose + IF predict_linear(instance:fd_utilization[10m], 3600) > 1 + FOR 10m + LABELS { + severity = "critical" + } + ANNOTATIONS + { + summary = "file descriptors soon exhausted", + description = "{{ $labels.job }} instance {{ $labels.instance }} will exhaust in file descriptors soon", + } + +### etcd proposal alerts ### + +# alert if there are several failed proposals within an hour +ALERT HighNumberOfFailedProposals + IF increase(etcd_server_proposal_failed_total{job="etcd"}[1h]) > 5 + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "a high number of failed proposals within the etcd cluster are happening", + description = "etcd instance {{ $labels.instance }} has seen {{ $value }} proposal failures within the last hour", + } + +### etcd disk io latency alerts + +# alert if 99th percentile of fsync durations is higher than 500ms +ALERT HighFsyncDurations + IF histogram_quantile(0.99, rate(etcd_wal_fsync_durations_seconds_bucket[5m])) > 0.5 + FOR 10m + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "high fsync durations", + description = "ectd instance {{ $labels.instance }} fync durations are high", + } diff --git a/kube-prometheus/rules/kubernetes.rules b/kube-prometheus/rules/kubernetes.rules new file mode 100644 index 00000000..6cbf8a9c --- /dev/null +++ b/kube-prometheus/rules/kubernetes.rules @@ -0,0 +1,387 @@ +# NOTE: These rules were kindly contributed by the SoundCloud engineering team. + +### Container resources ### + +cluster_namespace_controller_pod_container:spec_memory_limit_bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_spec_memory_limit_bytes{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + +cluster_namespace_controller_pod_container:spec_cpu_shares = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_spec_cpu_shares{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + +cluster_namespace_controller_pod_container:cpu_usage:rate = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + irate( + container_cpu_usage_seconds_total{container_name!=""}[5m] + ), + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + +cluster_namespace_controller_pod_container:memory_usage:bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_memory_usage_bytes{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + +cluster_namespace_controller_pod_container:memory_working_set:bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_memory_working_set_bytes{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + +cluster_namespace_controller_pod_container:memory_rss:bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_memory_rss{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + +cluster_namespace_controller_pod_container:memory_cache:bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_memory_cache{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + +cluster_namespace_controller_pod_container:disk_usage:bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_disk_usage_bytes{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + +cluster_namespace_controller_pod_container:memory_pagefaults:rate = + sum by (cluster,namespace,controller,pod_name,container_name,scope,type) ( + label_replace( + irate( + container_memory_failures_total{container_name!=""}[5m] + ), + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + +cluster_namespace_controller_pod_container:memory_oom:rate = + sum by (cluster,namespace,controller,pod_name,container_name,scope,type) ( + label_replace( + irate( + container_memory_failcnt{container_name!=""}[5m] + ), + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + +### Cluster resources ### + +cluster:memory_allocation:percent = + 100 * sum by (cluster) ( + container_spec_memory_limit_bytes{pod_name!=""} + ) / sum by (cluster) ( + machine_memory_bytes + ) + +cluster:memory_used:percent = + 100 * sum by (cluster) ( + container_memory_usage_bytes{pod_name!=""} + ) / sum by (cluster) ( + machine_memory_bytes + ) + +cluster:cpu_allocation:percent = + 100 * sum by (cluster) ( + container_spec_cpu_shares{pod_name!=""} + ) / sum by (cluster) ( + container_spec_cpu_shares{id="/"} * on(cluster,instance) machine_cpu_cores + ) + +cluster:node_cpu_use:percent = + 100 * sum by (cluster) ( + rate(node_cpu{mode!="idle"}[5m]) + ) / sum by (cluster) ( + machine_cpu_cores + ) + +### API latency ### + +# Raw metrics are in microseconds. Convert to seconds. +cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.99"} = + histogram_quantile( + 0.99, + sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket) + ) / 1e6 +cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.9"} = + histogram_quantile( + 0.9, + sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket) + ) / 1e6 +cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.5"} = + histogram_quantile( + 0.5, + sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket) + ) / 1e6 + +### Scheduling latency ### + +cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.99"} = + histogram_quantile(0.99,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6 +cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.9"} = + histogram_quantile(0.9,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6 +cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.5"} = + histogram_quantile(0.5,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6 + +cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.99"} = + histogram_quantile(0.99,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6 +cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.9"} = + histogram_quantile(0.9,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6 +cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.5"} = + histogram_quantile(0.5,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6 + +cluster:scheduler_binding_latency:quantile_seconds{quantile="0.99"} = + histogram_quantile(0.99,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6 +cluster:scheduler_binding_latency:quantile_seconds{quantile="0.9"} = + histogram_quantile(0.9,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6 +cluster:scheduler_binding_latency:quantile_seconds{quantile="0.5"} = + histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6 + +ALERT K8SNodeDown + IF up{job="kubelets"} == 0 + FOR 1h + LABELS { + service = "k8s", + severity = "warning" + } + ANNOTATIONS { + summary = "Kubelet cannot be scraped", + description = "Prometheus could not scrape a {{ $labels.job }} for more than one hour", + } + +ALERT K8SNodeNotReady + IF kube_node_status_ready{condition="true"} == 0 + FOR 1h + LABELS { + service = "k8s", + severity = "warning", + } + ANNOTATIONS { + summary = "Node status is NotReady", + description = "The Kubelet on {{ $labels.node }} has not checked in with the API, or has set itself to NotReady, for more than an hour", + } + +ALERT K8SManyNodesNotReady + IF + count by (cluster) (kube_node_status_ready{condition="true"} == 0) > 1 + AND + ( + count by (cluster) (kube_node_status_ready{condition="true"} == 0) + / + count by (cluster) (kube_node_status_ready{condition="true"}) + ) > 0.2 + FOR 1m + LABELS { + service = "k8s", + severity = "critical", + } + ANNOTATIONS { + summary = "Many K8s nodes are Not Ready", + description = "{{ $value }} K8s nodes (more than 10% of cluster {{ $labels.cluster }}) are in the NotReady state.", + } + +ALERT K8SKubeletNodeExporterDown + IF up{job="node-exporter"} == 0 + FOR 15m + LABELS { + service = "k8s", + severity = "warning" + } + ANNOTATIONS { + summary = "Kubelet node_exporter cannot be scraped", + description = "Prometheus could not scrape a {{ $labels.job }} for more than one hour.", + } + +ALERT K8SKubeletDown + IF absent(up{job="kubelets"}) or count by (cluster) (up{job="kubelets"} == 0) / count by (cluster) (up{job="kubelets"}) > 0.1 + FOR 1h + LABELS { + service = "k8s", + severity = "critical" + } + ANNOTATIONS { + summary = "Many Kubelets cannot be scraped", + description = "Prometheus failed to scrape more than 10% of kubelets, or all Kubelets have disappeared from service discovery.", + } + +ALERT K8SApiserverDown + IF up{job="kubernetes"} == 0 + FOR 15m + LABELS { + service = "k8s", + severity = "warning" + } + ANNOTATIONS { + summary = "API server unreachable", + description = "An API server could not be scraped.", + } + +# Disable for non HA kubernetes setups. +ALERT K8SApiserverDown + IF absent({job="kubernetes"}) or (count by(cluster) (up{job="kubernetes"} == 1) < count by(cluster) (up{job="kubernetes"})) + FOR 5m + LABELS { + service = "k8s", + severity = "critical" + } + ANNOTATIONS { + summary = "API server unreachable", + description = "Prometheus failed to scrape multiple API servers, or all API servers have disappeared from service discovery.", + } + +ALERT K8SSchedulerDown + IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0) + FOR 5m + LABELS { + service = "k8s", + severity = "critical", + } + ANNOTATIONS { + summary = "Scheduler is down", + description = "There is no running K8S scheduler. New pods are not being assigned to nodes.", + } + +ALERT K8SControllerManagerDown + IF absent(up{job="kube-controller-manager"}) or (count by(cluster) (up{job="kube-controller-manager"} == 1) == 0) + FOR 5m + LABELS { + service = "k8s", + severity = "critical", + } + ANNOTATIONS { + summary = "Controller manager is down", + description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.", + } + +ALERT K8SConntrackTableFull + IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 50 + FOR 10m + LABELS { + service = "k8s", + severity = "warning" + } + ANNOTATIONS { + summary = "Number of tracked connections is near the limit", + description = "The nf_conntrack table is {{ $value }}% full.", + } + +ALERT K8SConntrackTableFull + IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 90 + LABELS { + service = "k8s", + severity = "critical" + } + ANNOTATIONS { + summary = "Number of tracked connections is near the limit", + description = "The nf_conntrack table is {{ $value }}% full.", + } + +# To catch the conntrack sysctl de-tuning when it happens +ALERT K8SConntrackTuningMissing + IF node_nf_conntrack_udp_timeout > 10 + FOR 10m + LABELS { + service = "k8s", + severity = "warning", + } + ANNOTATIONS { + summary = "Node does not have the correct conntrack tunings", + description = "Nodes keep un-setting the correct tunings, investigate when it happens.", + } + +ALERT K8STooManyOpenFiles + IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 50 + FOR 10m + LABELS { + service = "k8s", + severity = "warning" + } + ANNOTATIONS { + summary = "{{ $labels.job }} has too many open file descriptors", + description = "{{ $labels.node }} is using {{ $value }}% of the available file/socket descriptors.", + } + +ALERT K8STooManyOpenFiles + IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 80 + FOR 10m + LABELS { + service = "k8s", + severity = "critical" + } + ANNOTATIONS { + summary = "{{ $labels.job }} has too many open file descriptors", + description = "{{ $labels.node }} is using {{ $value }}% of the available file/socket descriptors.", + } + +# Some verbs excluded because they are expected to be long-lasting: +# WATCHLIST is long-poll, CONNECT is `kubectl exec`. +ALERT K8SApiServerLatency + IF histogram_quantile( + 0.99, + sum without (instance,node,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH"}) + ) / 1e6 > 1.0 + FOR 10m + LABELS { + service = "k8s", + severity = "warning" + } + ANNOTATIONS { + summary = "Kubernetes apiserver latency is high", + description = "99th percentile Latency for {{ $labels.verb }} requests to the kube-apiserver is higher than 1s.", + } + +ALERT K8SApiServerEtcdAccessLatency + IF etcd_request_latencies_summary{quantile="0.99"} / 1e6 > 1.0 + FOR 15m + LABELS { + service = "k8s", + severity = "warning" + } + ANNOTATIONS { + summary = "Access to etcd is slow", + description = "99th percentile latency for apiserver to access etcd is higher than 1s.", + } + +ALERT K8SKubeletTooManyPods + IF kubelet_running_pod_count > 100 + LABELS { + service = "k8s", + severity = "warning", + } + ANNOTATIONS { + summary = "Kubelet is close to pod limit", + description = "Kubelet {{$labels.instance}} is running {{$value}} pods, close to the limit of 110", + } diff --git a/kube-prometheus/templates/NOTES.txt b/kube-prometheus/templates/NOTES.txt new file mode 100644 index 00000000..88fa073a --- /dev/null +++ b/kube-prometheus/templates/NOTES.txt @@ -0,0 +1,47 @@ +kube-prometheus has been installed. To create AlertManager & Prometheus, execute: + +bash -c 'cat <-kube-prometheus-alertmanager-main' + ## + # - name: main + + ## annotations for Alertmanager Ingress + ## + # annotations: + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: 'true' + + ## fully-qualified domain names of Alertmanager Ingress to create + ## + # hosts: + # - alertmanager.example.com + + ## TLS configuration for Alertmanager Ingress + ## secret must be manually created in the namespace + ## + # tls: + # - secretName: alertmanager-main-tls + # hosts: + # - alertmanager.example.com + + service: + ## annotations for Alertmanager Service + ## + # annotations: + + ## Alertmanager Service port to expose on each node + ## only used if alertmanager.service.type is 'NodePort' + ## + nodePort: 30903 + + ## Alertmanager Service type + ## + type: NodePort + + ## Alertmanager StorageSpec for persistent data + ## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec + ## + # storageSpec: + # class: default + # selector: "" + # resources: + # requests: + # storage: 4Gi + +grafana: + ## Grafana image + ## + image: + repository: grafana/grafana + tag: 3.1.1 + pullPolicy: IfNotPresent + + # ingress: + ## name to use when creating Grafana Ingress + ## Ex: 'grafana' will create Ingress named '-kube-prometheus-grafana' + ## + # - name: grafana + + ## annotations for Grafana Ingress + ## + # annotations: + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: 'true' + + ## fully-qualified domain names of Grafana Ingress to create + ## + # hosts: + # - grafana.example.com + + ## TLS configuration for Grafana Ingress + ## secret must be manually created in the namespace + ## + # tls: + # - secretName: grafana-tls + # hosts: + # - grafana.example.com + + ## Grafana resource limits & requests + ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + limits: + cpu: 300m + memory: 300Mi + requests: + cpu: 100m + memory: 100Mi + + service: + ## annotations for Grafana Service + ## + # annotations: + + ## Grafana Service port to expose on each node + ## only used if grafana.service.type is 'NodePort' + ## + nodePort: 30902 + + ## Grafana Service type + ## + type: NodePort + +grafanaWatcher: + ## grafana-watcher enables the loading of Grafana dashboards from a ConfigMap + ## Ref: https://github.com/coreos/kube-prometheus#dashboarding + ## + image: + repository: quay.io/coreos/grafana-watcher + tag: latest + pullPolicy: Always + + ## grafana-watcher resource limits & requests + ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + limits: + cpu: 100m + memory: 32Mi + requests: + cpu: 50m + memory: 16Mi + +kubeStateMetrics: + ## kube-state-metrics generates & exposes cluster-level Prometheus metrics + ## Ref: https://github.com/kubernetes/kube-state-metrics + ## + image: + repository: gcr.io/google_containers/kube-state-metrics + tag: v0.3.0 + pullPolicy: IfNotPresent + + ## kube-state-metrics resource limits & requests + ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + limits: + cpu: 200m + memory: 50Mi + requests: + cpu: 100m + memory: 30Mi + +nodeExporter: + ## Prometheus Node Exporter image + ## + image: + repository: quay.io/prometheus/node-exporter + tag: v0.13.0 + pullPolicy: IfNotPresent + + ## Prometheus Node Exporter resource limits & requests + ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + limits: + cpu: 200m + memory: 50Mi + requests: + cpu: 100m + memory: 30Mi + + service: + ## Prometheus Node Exporter Service port to expose on each node + ## + nodePort: 9100 + +prometheus: + ## name to use when creating Prometheus StatefulSet & Service + ## Ex: 'k8s' will create resources named 'prometheus-k8s' + ## + name: k8s + + ## if true, delete Prometheus StatefulSet & Service when release is deleted + ## + cleanup: true + + ## how long to retain metrics + ## + retention: 24h + + ## version of Prometheus to install + ## + version: v1.4.1 + + ## Prometheus resource limits & requests + ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + requests: + memory: 400Mi + + # ingress: + ## name to use when creating Prometheus Ingress + ## Ex: 'k8s' will create Ingress named '-kube-prometheus-prometheus-k8s' + ## + # - name: k8s + + ## annotations for Prometheus Ingress + ## + # annotations: + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: 'true' + + ## fully-qualified domain names of Prometheus Ingress to create + ## + # hosts: + # - prometheus.example.com + + ## TLS configuration for Prometheus Ingress + ## secret must be manually created in the namespace + ## + # tls: + # - secretName: prometheus-k8s-tls + # hosts: + # - prometheus.example.com + + service: + ## annotations for Prometheus Service + ## + # annotations: + + ## Prometheus Service port to expose on each node + ## only used if prometheus.service.type is 'NodePort' + ## + nodePort: 30900 + + ## Prometheus Service type + ## + type: NodePort + + ## Prometheus StorageSpec for persistent data + ## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec + ## + # storageSpec: + # class: default + # selector: "" + # resources: + # requests: + # storage: 16Gi + +prometheus-operator: + ## prometheus-operator image + ## + image: + repository: quay.io/coreos/prometheus-operator + tag: v0.2.1 + pullPolicy: IfNotPresent + + ## collect & send anonymous usage statistics + ## Ref: https://github.com/coreos/prometheus-operator#installation + ## + sendAnalytics: true + + ## prometheus-operator resource limits & requests + ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + limits: + cpu: 200m + memory: 300Mi + requests: + cpu: 100m + memory: 50Mi diff --git a/prometheus-operator/.helmignore b/prometheus-operator/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/prometheus-operator/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/prometheus-operator/Chart.yaml b/prometheus-operator/Chart.yaml new file mode 100644 index 00000000..e6f9fee7 --- /dev/null +++ b/prometheus-operator/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +description: Provides easy monitoring definitions for Kubernetes services, and deployment and management of Prometheus instances. +engine: gotpl +home: https://github.com/coreos/prometheus-operator +maintainers: + - name: Michael Goodness + email: mgoodness@gmail.com +name: prometheus-operator +version: 0.1.1 diff --git a/prometheus-operator/templates/NOTES.txt b/prometheus-operator/templates/NOTES.txt new file mode 100644 index 00000000..42b5cd73 --- /dev/null +++ b/prometheus-operator/templates/NOTES.txt @@ -0,0 +1,6 @@ +The Prometheus Operator has been installed. Check its status by running: + kubectl --namespace {{ .Release.Namespace }} get pods \ + -l "app={{ template "name" . }},release={{ .Release.Name }}" + +Visit https://github.com/coreos/prometheus-operator for instructions on how +to create & configure Alertmanager and Prometheus instances using the Operator. diff --git a/prometheus-operator/templates/_helpers.tpl b/prometheus-operator/templates/_helpers.tpl new file mode 100644 index 00000000..f0d83d2e --- /dev/null +++ b/prometheus-operator/templates/_helpers.tpl @@ -0,0 +1,16 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "fullname" -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/prometheus-operator/templates/delete-tprs-job.yaml b/prometheus-operator/templates/delete-tprs-job.yaml new file mode 100644 index 00000000..2b401d35 --- /dev/null +++ b/prometheus-operator/templates/delete-tprs-job.yaml @@ -0,0 +1,33 @@ +apiVersion: batch/v1 +kind: Job +metadata: + annotations: + helm.sh/hook: post-delete + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-delete-tprs +spec: + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-delete-tprs + spec: + containers: + - name: delete-alertmanager-tpr + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","thirdpartyresource/alertmanager.monitoring.coreos.com"] + - name: delete-prometheus-tpr + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","thirdpartyresource/prometheus.monitoring.coreos.com"] + - name: delete-servicemonitor-tpr + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","thirdpartyresource/service-monitor.monitoring.coreos.com"] + restartPolicy: Never diff --git a/prometheus-operator/templates/deployment.yaml b/prometheus-operator/templates/deployment.yaml new file mode 100644 index 00000000..cbcf112c --- /dev/null +++ b/prometheus-operator/templates/deployment.yaml @@ -0,0 +1,29 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + operator: prometheus + release: {{ .Release.Name }} + name: {{ template "fullname" . }} +spec: + replicas: 1 + template: + metadata: + labels: + app: {{ template "name" . }} + operator: prometheus + release: {{ .Release.Name }} + spec: + containers: + - name: {{ template "name" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: "{{ .Values.image.pullPolicy }}" + {{- if not .Values.sendAnalytics }} + args: + - --analytics=false + {{- end }} + resources: +{{ toYaml .Values.resources | indent 12 }} diff --git a/prometheus-operator/templates/get-tprs-job.yaml b/prometheus-operator/templates/get-tprs-job.yaml new file mode 100644 index 00000000..a5ada005 --- /dev/null +++ b/prometheus-operator/templates/get-tprs-job.yaml @@ -0,0 +1,33 @@ +apiVersion: batch/v1 +kind: Job +metadata: + annotations: + helm.sh/hook: post-install + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-get-tprs +spec: + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-get-tprs + spec: + containers: + - name: get-alertmanager-tpr + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","get","alertmanager"] + - name: get-prometheus-tpr + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","get","prometheus"] + - name: get-servicemonitor-tpr + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","get","servicemonitor"] + restartPolicy: OnFailure diff --git a/prometheus-operator/values.yaml b/prometheus-operator/values.yaml new file mode 100644 index 00000000..00bdf20c --- /dev/null +++ b/prometheus-operator/values.yaml @@ -0,0 +1,31 @@ +global: + ## hyperkube image to use when getting/deleting ThirdPartyResources + ## created by prometheus-operator. + ## + hyperkube: + repository: quay.io/coreos/hyperkube + tag: v1.5.2_coreos.1 + pullPolicy: IfNotPresent + +## prometheus-operator image +## +image: + repository: quay.io/coreos/prometheus-operator + tag: v0.2.1 + pullPolicy: IfNotPresent + +## if true, collect & send anonymous usage statistics +## Ref: https://github.com/coreos/prometheus-operator#installation +## +sendAnalytics: true + +## prometheus-operator resource limits & requests +## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ +## +resources: + limits: + cpu: 200m + memory: 300Mi + requests: + cpu: 100m + memory: 50Mi From d1ef0708cd3793a3031116af8b72d0c96211fbd1 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Mon, 23 Jan 2017 14:57:55 -0600 Subject: [PATCH 02/41] Renamed Deployment dashboard -> Deployments --- .../{deployment-dashboard.json => deployments-dashboard.json} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename kube-prometheus/grafana/{deployment-dashboard.json => deployments-dashboard.json} (99%) diff --git a/kube-prometheus/grafana/deployment-dashboard.json b/kube-prometheus/grafana/deployments-dashboard.json similarity index 99% rename from kube-prometheus/grafana/deployment-dashboard.json rename to kube-prometheus/grafana/deployments-dashboard.json index db6b240c..720e9903 100644 --- a/kube-prometheus/grafana/deployment-dashboard.json +++ b/kube-prometheus/grafana/deployments-dashboard.json @@ -37,7 +37,7 @@ } ], "id": null, - "title": "Deployment", + "title": "Deployments", "tags": [], "style": "dark", "timezone": "browser", @@ -782,7 +782,7 @@ "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, - "label": "Deployment", + "label": "Deployments", "multi": false, "name": "deployment_name", "options": [], From 0fc1109ff650aeefe6a012dafdffa14a32822511 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Tue, 24 Jan 2017 09:04:47 -0600 Subject: [PATCH 03/41] Added Makefile --- Makefile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..6e7b51bb --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +kube-prometheus: prometheus-operator kube-prometheus-*.tgz + +prometheus-operator: prometheus-operator-*.tgz + +kube-prometheus-*.tgz: + cp prometheus-operator-*.tgz kube-prometheus/charts + helm package kube-prometheus + +prometheus-operator-*.tgz: + helm package prometheus-operator + +clean: + rm -f kube-prometheus/charts/* + rm -f *.tgz From b6babb84585db18577a9e7d5f651bc25ef7bd896 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Tue, 24 Jan 2017 11:36:23 -0600 Subject: [PATCH 04/41] prometheus-operator v0.1.2 * use prometheus-operator v0.2.3 --- prometheus-operator/Chart.yaml | 2 +- prometheus-operator/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/prometheus-operator/Chart.yaml b/prometheus-operator/Chart.yaml index e6f9fee7..ac020180 100644 --- a/prometheus-operator/Chart.yaml +++ b/prometheus-operator/Chart.yaml @@ -6,4 +6,4 @@ maintainers: - name: Michael Goodness email: mgoodness@gmail.com name: prometheus-operator -version: 0.1.1 +version: 0.1.2 diff --git a/prometheus-operator/values.yaml b/prometheus-operator/values.yaml index 00bdf20c..a1c5a6d7 100644 --- a/prometheus-operator/values.yaml +++ b/prometheus-operator/values.yaml @@ -11,7 +11,7 @@ global: ## image: repository: quay.io/coreos/prometheus-operator - tag: v0.2.1 + tag: v0.2.3 pullPolicy: IfNotPresent ## if true, collect & send anonymous usage statistics From 89ff91b278931c6d3a888766dd71ef5a689b919b Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Tue, 24 Jan 2017 11:37:50 -0600 Subject: [PATCH 05/41] kube-prometheus v0.1.1 * use prometheus-operator v0.2.3 * use Grafana v4.1.1 * use Prometheus v1.5.0 * updated TLS validation settings --- kube-prometheus/Chart.yaml | 2 +- .../templates/prometheus-k8s-configmap.yaml | 18 +++++++++--------- kube-prometheus/values.yaml | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kube-prometheus/Chart.yaml b/kube-prometheus/Chart.yaml index 5232ed0e..2a5d1179 100644 --- a/kube-prometheus/Chart.yaml +++ b/kube-prometheus/Chart.yaml @@ -6,4 +6,4 @@ maintainers: - name: Michael Goodness email: mgoodness@gmail.com name: kube-prometheus -version: 0.1.0 +version: 0.1.1 diff --git a/kube-prometheus/templates/prometheus-k8s-configmap.yaml b/kube-prometheus/templates/prometheus-k8s-configmap.yaml index 1aec7275..6c0312cd 100644 --- a/kube-prometheus/templates/prometheus-k8s-configmap.yaml +++ b/kube-prometheus/templates/prometheus-k8s-configmap.yaml @@ -45,12 +45,12 @@ data: scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - # Skip verification until we have resolved why the certificate validation - # for the kubelet on API server nodes fail. + # Kubelet certificates fail validation because they don't include + # external IP address. Circumvent for now. insecure_skip_verify: true - # Scrapes the endpoint lists for the kube-dns server. Which we consider - # part of a default setup. + # Scrape the endpoint lists for kube-apiserver, kube-controller-manager, + # kube-dns and kube-scheduler. - job_name: kube-components bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: @@ -72,9 +72,12 @@ data: target_label: __scheme__ tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # API server certificates fail validation because they don't + # include external IP address. Circumvent for now. + insecure_skip_verify: true - # Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics, - # and node-exporter, which we all consider part of a default setup. + # Scrape the endpoint lists for Prometheus, node-exporters, and + # kube-state-metrics. - job_name: standard-endpoints bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: @@ -88,6 +91,3 @@ data: target_label: job tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - # As for kubelets, certificate validation fails for the API server (node) - # and we circumvent it for now. - insecure_skip_verify: true diff --git a/kube-prometheus/values.yaml b/kube-prometheus/values.yaml index 52c258f0..26542e50 100644 --- a/kube-prometheus/values.yaml +++ b/kube-prometheus/values.yaml @@ -80,7 +80,7 @@ grafana: ## image: repository: grafana/grafana - tag: 3.1.1 + tag: 4.1.1 pullPolicy: IfNotPresent # ingress: @@ -213,7 +213,7 @@ prometheus: ## version of Prometheus to install ## - version: v1.4.1 + version: v1.5.0 ## Prometheus resource limits & requests ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ @@ -276,7 +276,7 @@ prometheus-operator: ## image: repository: quay.io/coreos/prometheus-operator - tag: v0.2.1 + tag: v0.2.3 pullPolicy: IfNotPresent ## collect & send anonymous usage statistics From acda5cac24cd69b41eaf6b616fb82fd8109fd39e Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Thu, 26 Jan 2017 09:16:31 -0600 Subject: [PATCH 06/41] grafana: added Prometheus Stats dashboard --- kube-prometheus/grafana/prometheus-stats.json | 742 ++++++++++++++++++ 1 file changed, 742 insertions(+) create mode 100644 kube-prometheus/grafana/prometheus-stats.json diff --git a/kube-prometheus/grafana/prometheus-stats.json b/kube-prometheus/grafana/prometheus-stats.json new file mode 100644 index 00000000..bd31d5b9 --- /dev/null +++ b/kube-prometheus/grafana/prometheus-stats.json @@ -0,0 +1,742 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.1.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": null, + "links": [ + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Grafana Docs", + "tooltip": "", + "type": "link", + "url": "http://www.grafana.org/docs" + }, + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Prometheus Docs", + "type": "link", + "url": "http://prometheus.io/docs/introduction/overview/" + } + ], + "refresh": "30s", + "revision": "1.0", + "rows": [ + { + "collapse": false, + "height": 178, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 1, + "editable": true, + "error": false, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "(time() - process_start_time_seconds{job=\"prometheus-k8s\"})", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "prometheus_local_storage_memory_series", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "1,5", + "title": "Local Storage Memory Series", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "prometheus_local_storage_indexing_queue_length", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "500,4000", + "title": "Internal Storage Queue Length", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "Empty", + "value": "0" + } + ], + "valueName": "current" + }, + { + "content": "\"Prometheus\nPrometheus\n\n

You're using Prometheus, an open-source systems monitoring and alerting toolkit originally built at SoundCloud. For more information, check out the Grafana and Prometheus projects.

", + "editable": true, + "error": false, + "id": 9, + "links": [], + "mode": "html", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 227, + "panels": [ + { + "aliasColors": { + "prometheus": "#C15C17", + "{instance=\"localhost:9090\",job=\"prometheus\"}": "#C15C17" + }, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_local_storage_ingested_samples_total[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Samples ingested (rate-5m)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "content": "#### Samples Ingested\nThis graph displays the count of samples ingested by the Prometheus server, as measured over the last 5 minutes, per time series in the range vector. When troubleshooting an issue on IRC or Github, this is often the first stat requested by the Prometheus team. ", + "editable": true, + "error": false, + "id": 8, + "links": [], + "mode": "markdown", + "span": 2.995914043583536, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "prometheus": "#F9BA8F", + "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" + }, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_target_interval_length_seconds_count[5m])", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Target Scrapes (last 5m)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_target_interval_length_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{quantile}} ({{interval}})", + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Scrape Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "content": "#### Scrapes\nPrometheus scrapes metrics from instrumented jobs, either directly or via an intermediary push gateway for short-lived jobs. Target scrapes will show how frequently targets are scraped, as measured over the last 5 minutes, per time series in the range vector. Scrape Duration will show how long the scrapes are taking, with percentiles available as series. ", + "editable": true, + "error": false, + "id": 11, + "links": [], + "mode": "markdown", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_evaluator_duration_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{quantile}}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Rule Eval Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "content": "#### Rule Evaluation Duration\nThis graph panel plots the duration for all evaluations to execute. The 50th percentile, 90th percentile and 99th percentile are shown as three separate series to help identify outliers that may be skewing the data.", + "editable": true, + "error": false, + "id": 15, + "links": [], + "mode": "markdown", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "prometheus" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Prometheus Stats", + "version": 1 +} From 11a9fb8b4ef740db24b8fde1b7ad4ca3410c89fd Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Thu, 26 Jan 2017 09:17:08 -0600 Subject: [PATCH 07/41] grafana: updated dashboards to v4.1.1 --- .../grafana/all-nodes-dashboard.json | 239 ++---- .../grafana/deployments-dashboard.json | 681 ++++++++------- .../grafana/kubernetes-pods-dashboard.json | 796 +++++++++--------- kube-prometheus/grafana/node-dashboard.json | 267 +++--- 4 files changed, 933 insertions(+), 1050 deletions(-) diff --git a/kube-prometheus/grafana/all-nodes-dashboard.json b/kube-prometheus/grafana/all-nodes-dashboard.json index c0bbe251..21eaadf3 100644 --- a/kube-prometheus/grafana/all-nodes-dashboard.json +++ b/kube-prometheus/grafana/all-nodes-dashboard.json @@ -1,5 +1,4 @@ { - "dashboard": { "__inputs": [ { "name": "DS_PROMETHEUS", @@ -12,56 +11,47 @@ ], "__requires": [ { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.1.1" }, { "type": "panel", - "id": "singlestat", - "name": "Singlestat", + "id": "graph", + "name": "Graph", "version": "" }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "3.1.1" - }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" } ], - "id": null, - "title": "All Nodes", + "annotations": { + "list": [] + }, "description": "Dashboard to get an overview of one server", - "tags": [ - "prometheus" - ], - "style": "dark", - "timezone": "browser", "editable": true, + "gnetId": 22, + "graphTooltip": 0, "hideControls": false, - "sharedCrosshair": false, + "id": null, + "links": [], + "refresh": "30s", "rows": [ { "collapse": false, - "editable": true, "height": "250px", "panels": [ { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -69,20 +59,14 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 3, - "isNew": true, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, @@ -120,7 +104,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -142,14 +129,6 @@ ] }, { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -157,14 +136,8 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 9, - "isNew": true, "legend": { "avg": false, "current": false, @@ -224,7 +197,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -246,22 +222,18 @@ ] } ], - "title": "New row" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" }, { "collapse": false, - "editable": true, "height": "250px", "panels": [ { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -269,14 +241,8 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 4, - "isNew": true, "legend": { "avg": false, "current": false, @@ -326,7 +292,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -369,7 +338,6 @@ }, "id": 5, "interval": null, - "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ @@ -426,22 +394,18 @@ "valueName": "avg" } ], - "title": "New row" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" }, { "collapse": false, - "editable": true, "height": "250px", "panels": [ { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -449,14 +413,8 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 6, - "isNew": true, "legend": { "avg": false, "current": false, @@ -528,7 +486,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -571,7 +532,6 @@ }, "id": 7, "interval": null, - "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ @@ -628,22 +588,18 @@ "valueName": "current" } ], - "title": "New row" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" }, { "collapse": false, - "editable": true, "height": "250px", "panels": [ { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -651,20 +607,14 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 8, - "isNew": true, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, @@ -708,7 +658,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -730,14 +683,6 @@ ] }, { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -745,20 +690,14 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 10, - "isNew": true, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, @@ -802,7 +741,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -824,9 +766,22 @@ ] } ], - "title": "New row" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" } ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "prometheus" + ], + "templating": { + "list": [] + }, "time": { "from": "now-1h", "to": "now" @@ -856,25 +811,7 @@ "30d" ] }, - "templating": { - "list": [] - }, - "annotations": { - "list": [] - }, - "refresh": false, - "schemaVersion": 12, - "version": 10, - "links": [], - "gnetId": 22 -}, - "inputs": [ - { - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "type": "datasource", - "value": "prometheus" - } - ], - "overwrite": true + "timezone": "browser", + "title": "All Nodes", + "version": 2 } diff --git a/kube-prometheus/grafana/deployments-dashboard.json b/kube-prometheus/grafana/deployments-dashboard.json index 720e9903..0d0a61a5 100644 --- a/kube-prometheus/grafana/deployments-dashboard.json +++ b/kube-prometheus/grafana/deployments-dashboard.json @@ -1,5 +1,4 @@ { - "dashboard": { "__inputs": [ { "name": "DS_PROMETHEUS", @@ -12,10 +11,10 @@ ], "__requires": [ { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.1.1" }, { "type": "panel", @@ -23,65 +22,134 @@ "name": "Graph", "version": "" }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "3.1.1" - }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" } ], - "id": null, - "title": "Deployments", - "tags": [], - "style": "dark", - "timezone": "browser", + "annotations": { + "list": [] + }, "editable": true, + "gnetId": null, + "graphTooltip": 1, "hideControls": false, - "sharedCrosshair": true, + "id": null, + "links": [], + "refresh": "30s", "rows": [ { "collapse": false, - "editable": true, "height": "200px", "panels": [ { - "title": "CPU", - "error": false, - "span": 4, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", "editable": true, - "type": "singlestat", - "isNew": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "cores", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, "targets": [ { - "refId": "A", "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) ", "intervalFactor": 2, - "step": 600 + "refId": "A", + "step": 60 } ], - "links": [], - "datasource": "${DS_PROMETHEUS}", - "maxDataPoints": 100, - "interval": null, - "cacheTimeout": null, - "format": "none", - "prefix": "", - "postfix": "cores", - "nullText": null, + "thresholds": "", + "title": "CPU", + "type": "singlestat", + "valueFontSize": "110%", "valueMaps": [ { - "value": "null", "op": "=", - "text": "N/A" + "text": "N/A", + "value": "null" } ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, "mappingTypes": [ { "name": "value to text", @@ -92,73 +160,72 @@ "value": 2 } ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "GB", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "80%", "rangeMaps": [ { "from": "null", - "to": "null", - "text": "N/A" + "text": "N/A", + "to": "null" } ], - "mappingType": 1, - "nullPointMode": "connected", - "valueName": "avg", - "prefixFontSize": "50%", - "valueFontSize": "110%", - "postfixFontSize": "50%", - "thresholds": "", - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], + "span": 4, "sparkline": { - "show": true, + "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", - "fillColor": "rgba(31, 118, 189, 0.18)" + "show": true }, - "gauge": { - "show": false, - "minValue": 0, - "maxValue": 100, - "thresholdMarkers": true, - "thresholdLabels": false - } - }, - { - "title": "Memory", - "error": false, - "span": 4, - "editable": true, - "type": "singlestat", - "isNew": true, - "id": 9, "targets": [ { - "refId": "A", "expr": "sum(container_memory_usage_bytes{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}) / 1024^3", "intervalFactor": 2, - "step": 600 + "refId": "A", + "step": 60 } ], - "links": [], - "datasource": "${DS_PROMETHEUS}", - "maxDataPoints": 100, - "interval": null, - "cacheTimeout": null, - "format": "none", - "prefix": "", - "postfix": "GB", - "nullText": null, + "thresholds": "", + "title": "Memory", + "type": "singlestat", + "valueFontSize": "110%", "valueMaps": [ { - "value": "null", "op": "=", - "text": "N/A" + "text": "N/A", + "value": "null" } ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "Bps", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": false + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, "mappingTypes": [ { "name": "value to text", @@ -169,73 +236,85 @@ "value": 2 } ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", - "to": "null", - "text": "N/A" + "text": "N/A", + "to": "null" } ], - "mappingType": 1, - "nullPointMode": "connected", - "valueName": "avg", - "prefixFontSize": "80%", - "valueFontSize": "110%", - "postfixFontSize": "50%", - "thresholds": "", - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], + "span": 4, "sparkline": { - "show": true, + "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", - "fillColor": "rgba(31, 118, 189, 0.18)" + "show": true }, - "gauge": { - "show": false, - "minValue": 0, - "maxValue": 100, - "thresholdMarkers": true, - "thresholdLabels": false - } - }, - { - "title": "Network", - "error": false, - "span": 4, - "editable": true, - "type": "singlestat", - "isNew": true, - "id": 7, "targets": [ { - "refId": "A", "expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) ", "intervalFactor": 2, - "step": 600 + "refId": "A", + "step": 60 } ], - "links": [], - "datasource": "${DS_PROMETHEUS}", - "maxDataPoints": 100, - "interval": null, - "cacheTimeout": null, - "format": "Bps", - "prefix": "", - "postfix": "", - "nullText": null, + "thresholds": "", + "title": "Network", + "type": "singlestat", + "valueFontSize": "80%", "valueMaps": [ { - "value": "null", "op": "=", - "text": "N/A" + "text": "N/A", + "value": "null" } ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "100px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": false + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, "mappingTypes": [ { "name": "value to text", @@ -246,108 +325,51 @@ "value": 2 } ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", - "to": "null", - "text": "N/A" + "text": "N/A", + "to": "null" } ], - "mappingType": 1, - "nullPointMode": "connected", - "valueName": "avg", - "prefixFontSize": "50%", - "valueFontSize": "80%", - "postfixFontSize": "50%", - "thresholds": "", - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], + "span": 3, "sparkline": { - "show": true, + "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", - "fillColor": "rgba(31, 118, 189, 0.18)" + "show": false }, - "gauge": { - "show": false, - "minValue": 0, - "maxValue": 100, - "thresholdMarkers": false, - "thresholdLabels": false - } - } - ], - "title": "Row", - "showTitle": false - }, - { - "title": "New row", - "height": "100px", - "editable": true, - "collapse": false, - "panels": [ - { - "title": "Desired Replicas", - "error": false, - "span": 3, - "editable": true, - "type": "singlestat", - "isNew": true, - "id": 5, "targets": [ { - "refId": "A", "expr": "kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, - "step": 600, - "metric": "kube_deployment_spec_replicas" + "metric": "kube_deployment_spec_replicas", + "refId": "A", + "step": 60 } ], - "links": [], - "datasource": "${DS_PROMETHEUS}", - "maxDataPoints": 100, - "interval": null, - "cacheTimeout": null, - "format": "none", - "prefix": "", - "postfix": "", - "nullText": null, + "thresholds": "", + "title": "Desired Replicas", + "type": "singlestat", + "valueFontSize": "80%", "valueMaps": [ { - "value": "null", "op": "=", - "text": "N/A" - } - ], - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "rangeMaps": [ - { - "from": "null", - "to": "null", - "text": "N/A" + "text": "N/A", + "value": "null" } ], - "mappingType": 1, - "nullPointMode": "connected", - "valueName": "avg", - "prefixFontSize": "50%", - "valueFontSize": "80%", - "postfixFontSize": "50%", - "thresholds": "", + "valueName": "avg" + }, + { + "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ @@ -355,53 +377,21 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "sparkline": { - "show": false, - "full": false, - "lineColor": "rgb(31, 120, 193)", - "fillColor": "rgba(31, 118, 189, 0.18)" - }, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", "gauge": { - "show": false, - "minValue": 0, "maxValue": 100, - "thresholdMarkers": false, - "thresholdLabels": false + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - "decimals": null - }, - { - "title": "Available Replicas", - "error": false, - "span": 3, - "editable": true, - "type": "singlestat", - "isNew": true, "id": 6, - "targets": [ - { - "refId": "A", - "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", - "intervalFactor": 2, - "step": 600 - } - ], - "links": [], - "datasource": "${DS_PROMETHEUS}", - "maxDataPoints": 100, "interval": null, - "cacheTimeout": null, - "format": "none", - "prefix": "", - "postfix": "", - "nullText": null, - "valueMaps": [ - { - "value": "null", - "op": "=", - "text": "N/A" - } - ], + "links": [], + "mappingType": 1, "mappingTypes": [ { "name": "value to text", @@ -412,40 +402,47 @@ "value": 2 } ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", - "to": "null", - "text": "N/A" + "text": "N/A", + "to": "null" } ], - "mappingType": 1, - "nullPointMode": "connected", - "valueName": "avg", - "prefixFontSize": "50%", - "valueFontSize": "80%", - "postfixFontSize": "50%", - "thresholds": "", - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], + "span": 3, "sparkline": { - "show": false, + "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", - "fillColor": "rgba(31, 118, 189, 0.18)" + "show": false }, - "gauge": { - "show": false, - "minValue": 0, - "maxValue": 100, - "thresholdMarkers": true, - "thresholdLabels": false - } + "targets": [ + { + "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Available Replicas", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" }, { "cacheTimeout": null, @@ -469,7 +466,6 @@ }, "id": 3, "interval": null, - "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ @@ -509,7 +505,7 @@ "intervalFactor": 2, "legendFormat": "", "refId": "A", - "step": 600 + "step": 60 } ], "thresholds": "", @@ -547,7 +543,6 @@ }, "id": 2, "interval": null, - "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ @@ -587,7 +582,7 @@ "intervalFactor": 2, "legendFormat": "", "refId": "A", - "step": 600 + "step": 60 } ], "thresholds": "", @@ -603,11 +598,16 @@ ], "valueName": "avg" } - ] + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" }, { "collapse": false, - "editable": true, "height": "350px", "panels": [ { @@ -617,23 +617,17 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 1, - "isNew": true, "legend": { "avg": false, "current": false, + "hideZero": false, "max": false, "min": false, "show": true, "total": false, - "values": false, - "hideZero": false + "values": false }, "lines": true, "linewidth": 2, @@ -653,35 +647,35 @@ "intervalFactor": 2, "legendFormat": "current replicas", "refId": "A", - "step": 30 + "step": 4 }, { "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "legendFormat": "available", "refId": "B", - "step": 30 + "step": 4 }, { "expr": "kube_deployment_status_replicas_unavailable{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "legendFormat": "unavailable", "refId": "C", - "step": 30 + "step": 4 }, { "expr": "kube_deployment_status_replicas_updated{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "legendFormat": "updated", "refId": "D", - "step": 30 + "step": 4 }, { "expr": "kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", "intervalFactor": 2, "legendFormat": "desired", "refId": "E", - "step": 30 + "step": 4 } ], "thresholds": [], @@ -694,6 +688,7 @@ "sort": 0, "value_type": "cumulative" }, + "transparent": false, "type": "graph", "xaxis": { "mode": "time", @@ -718,43 +713,20 @@ "min": null, "show": false } - ], - "transparent": false + ] } ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, "title": "New row", - "showTitle": false + "titleSize": "h6" } ], - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, + "schemaVersion": 14, + "style": "dark", + "tags": [], "templating": { "list": [ { @@ -772,6 +744,7 @@ "regex": "", "sort": 0, "tagValuesQuery": null, + "tags": [], "tagsQuery": "", "type": "query", "useTags": false @@ -791,27 +764,43 @@ "regex": "", "sort": 0, "tagValuesQuery": "", + "tags": [], "tagsQuery": "deployment", "type": "query", "useTags": false } ] }, - "annotations": { - "list": [] + "time": { + "from": "now-1h", + "to": "now" }, - "schemaVersion": 12, - "version": 2, - "links": [], - "gnetId": null -}, - "inputs": [ - { - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "type": "datasource", - "value": "prometheus" - } + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" ], - "overwrite": true + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Deployments", + "version": 1 } diff --git a/kube-prometheus/grafana/kubernetes-pods-dashboard.json b/kube-prometheus/grafana/kubernetes-pods-dashboard.json index 035da015..ecf2f499 100644 --- a/kube-prometheus/grafana/kubernetes-pods-dashboard.json +++ b/kube-prometheus/grafana/kubernetes-pods-dashboard.json @@ -1,409 +1,421 @@ { - "dashboard": { - "__inputs": [ - { - "description": "", - "label": "prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "__requires": [ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.1.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 1, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ { - "id": "graph", - "name": "Graph", - "type": "panel", - "version": "" + "expr": "sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Current: {{ container_name }}", + "metric": "container_memory_usage_bytes", + "refId": "A", + "step": 10 }, { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "3.1.1" + "expr": "kube_pod_container_requested_memory_bytes{pod=\"$pod\", container=~\"$container\"}", + "interval": "10s", + "intervalFactor": 2, + "legendFormat": "Requested: {{ container }}", + "metric": "kube_pod_container_requested_memory_bytes", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "id": "prometheus", - "name": "Prometheus", - "type": "datasource", - "version": "1.0.0" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } - ], - "annotations": { - "list": [] - }, - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "rows": [ + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ { - "collapse": false, - "editable": true, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 1, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Current: {{ container_name }}", - "metric": "container_memory_usage_bytes", - "refId": "A", - "step": 10 - }, - { - "expr": "kube_pod_container_requested_memory_bytes{pod=\"$pod\", container=~\"$container\"}", - "interval": "10s", - "intervalFactor": 2, - "legendFormat": "Requested: {{ container }}", - "metric": "kube_pod_container_requested_memory_bytes", - "refId": "B", - "step": 20 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "show": true - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "title": "Row" - }, + "expr": "sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m] ) )", + "intervalFactor": 2, + "legendFormat": "{{ container_name }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "collapse": false, - "editable": true, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 2, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m] ) )", - "intervalFactor": 2, - "legendFormat": "{{ container_name }}", - "refId": "A", - "step": 30 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "show": true - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "title": "New row" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "collapse": false, - "editable": true, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 3, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum by (pod_name) (rate (container_network_receive_bytes_total{pod_name=\"$pod\"}[1m]) ))", - "intervalFactor": 2, - "legendFormat": "{{ pod_name }}", - "refId": "A", - "step": 30 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Network I/O", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "show": true - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "title": "New row" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } - ], - "schemaVersion": 12, - "sharedCrosshair": true, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": ".*", - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": true, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [], - "query": "label_values(kube_pod_info, namespace)", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": false, - "label": "Pod", - "multi": false, - "name": "pod", - "options": [], - "query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "allValue": ".*", - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": true, - "label": "Container", - "multi": false, - "name": "container", - "options": [], - "query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)", - "refresh": 1, - "regex": "", - "type": "query" - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Pods", - "version": 26 + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" }, - "inputs": [ + { + "collapse": false, + "height": "250px", + "panels": [ { - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "type": "datasource", - "value": "prometheus" + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (pod_name) (rate (container_network_receive_bytes_total{pod_name=\"$pod\"}[1m]) ))", + "intervalFactor": 2, + "legendFormat": "{{ pod_name }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(kube_pod_info, namespace)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [], + "query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Container", + "multi": false, + "name": "container", + "options": [], + "query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" ], - "overwrite": true + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Pods", + "version": 2 } diff --git a/kube-prometheus/grafana/node-dashboard.json b/kube-prometheus/grafana/node-dashboard.json index 6f90c9fd..cabff34e 100644 --- a/kube-prometheus/grafana/node-dashboard.json +++ b/kube-prometheus/grafana/node-dashboard.json @@ -1,5 +1,4 @@ { - "dashboard": { "__inputs": [ { "name": "DS_PROMETHEUS", @@ -12,56 +11,47 @@ ], "__requires": [ { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.1.1" }, { "type": "panel", - "id": "singlestat", - "name": "Singlestat", + "id": "graph", + "name": "Graph", "version": "" }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "3.1.1" - }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" } ], - "id": null, - "title": "Nodes", + "annotations": { + "list": [] + }, "description": "Dashboard to get an overview of one server", - "tags": [ - "prometheus" - ], - "style": "dark", - "timezone": "browser", "editable": true, + "gnetId": 22, + "graphTooltip": 0, "hideControls": false, - "sharedCrosshair": false, + "id": null, + "links": [], + "refresh": "30s", "rows": [ { "collapse": false, - "editable": true, "height": "250px", "panels": [ { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -69,14 +59,8 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold2": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 3, - "isNew": true, "legend": { "avg": false, "current": false, @@ -120,7 +104,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -142,14 +129,6 @@ ] }, { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -157,14 +136,8 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold2": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 9, - "isNew": true, "legend": { "avg": false, "current": false, @@ -224,7 +197,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -246,22 +222,18 @@ ] } ], - "title": "New row" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" }, { "collapse": false, - "editable": true, "height": "250px", "panels": [ { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -269,14 +241,8 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold2": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 4, - "isNew": true, "legend": { "avg": false, "current": false, @@ -326,7 +292,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -369,7 +338,6 @@ }, "id": 5, "interval": null, - "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ @@ -426,22 +394,18 @@ "valueName": "avg" } ], - "title": "New row" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" }, { "collapse": false, - "editable": true, "height": "250px", "panels": [ { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -449,14 +413,8 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold2": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 6, - "isNew": true, "legend": { "avg": false, "current": false, @@ -528,7 +486,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -571,7 +532,6 @@ }, "id": 7, "interval": null, - "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ @@ -628,22 +588,18 @@ "valueName": "current" } ], - "title": "New row" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" }, { "collapse": false, - "editable": true, "height": "250px", "panels": [ { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -651,14 +607,8 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold2": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 8, - "isNew": true, "legend": { "avg": false, "current": false, @@ -708,7 +658,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -730,14 +683,6 @@ ] }, { - "alert": { - "crit": { - "op": ">" - }, - "warn": { - "op": ">" - } - }, "alerting": {}, "aliasColors": {}, "bars": false, @@ -745,14 +690,8 @@ "editable": true, "error": false, "fill": 1, - "grid": { - "threshold1": null, - "threshold2": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, + "grid": {}, "id": 10, - "isNew": true, "legend": { "avg": false, "current": false, @@ -802,7 +741,10 @@ }, "type": "graph", "xaxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "yaxes": [ { @@ -824,9 +766,43 @@ ] } ], - "title": "New row" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" } ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "prometheus" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "server", + "options": [], + "query": "label_values(node_boot_time, instance)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, "time": { "from": "now-1h", "to": "now" @@ -856,38 +832,7 @@ "30d" ] }, - "templating": { - "list": [ - { - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": false, - "multi": false, - "name": "server", - "options": [], - "query": "label_values(node_boot_time, instance)", - "refresh": 1, - "type": "query" - } - ] - }, - "annotations": { - "list": [] - }, - "refresh": false, - "schemaVersion": 12, - "version": 1, - "links": [], - "gnetId": 22 -}, - "inputs": [ - { - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "type": "datasource", - "value": "prometheus" - } - ], - "overwrite": true + "timezone": "browser", + "title": "Nodes", + "version": 1 } From d0ac1a9c4b231ccde208d8e0823fca8ae8294e1a Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Thu, 26 Jan 2017 09:57:52 -0600 Subject: [PATCH 08/41] grafana: added inputs section to dashboards --- .../grafana/all-nodes-dashboard.json | 1597 ++++++++-------- .../grafana/deployments-dashboard.json | 1573 ++++++++-------- .../grafana/kubernetes-pods-dashboard.json | 827 +++++---- kube-prometheus/grafana/node-dashboard.json | 1635 +++++++++-------- kube-prometheus/grafana/prometheus-stats.json | 1453 +++++++-------- 5 files changed, 3570 insertions(+), 3515 deletions(-) diff --git a/kube-prometheus/grafana/all-nodes-dashboard.json b/kube-prometheus/grafana/all-nodes-dashboard.json index 21eaadf3..618c883c 100644 --- a/kube-prometheus/grafana/all-nodes-dashboard.json +++ b/kube-prometheus/grafana/all-nodes-dashboard.json @@ -1,817 +1,828 @@ { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "4.1.1" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" + "dashboard": { + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.1.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [] }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - } - ], - "annotations": { - "list": [] - }, - "description": "Dashboard to get an overview of one server", - "editable": true, - "gnetId": 22, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [], - "refresh": "30s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(node_cpu{mode=\"idle\"}[2m])) * 100", - "hide": false, - "intervalFactor": 10, - "legendFormat": "", - "refId": "A", - "step": 50 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Idle cpu", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": "cpu usage", - "logBase": 1, - "max": null, - "min": 0, - "show": true + "description": "Overview of all nodes", + "editable": true, + "gnetId": 22, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(node_load1)", - "intervalFactor": 4, - "legendFormat": "load 1m", - "refId": "A", - "step": 20, - "target": "" + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_cpu{mode=\"idle\"}[2m])) * 100", + "hide": false, + "intervalFactor": 10, + "legendFormat": "", + "refId": "A", + "step": 50 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Idle cpu", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" }, - { - "expr": "sum(node_load5)", - "intervalFactor": 4, - "legendFormat": "load 5m", - "refId": "B", - "step": 20, - "target": "" + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] }, - { - "expr": "sum(node_load15)", - "intervalFactor": 4, - "legendFormat": "load 15m", - "refId": "C", - "step": 20, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "System load", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" + "yaxes": [ + { + "format": "percent", + "label": "cpu usage", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}", - "yaxis": 2 - } - ], - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(node_memory_MemTotal) - sum(node_memory_MemFree)", - "intervalFactor": 2, - "legendFormat": "free memory", - "metric": "memo", - "refId": "A", - "step": 4, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Free memory", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_load1)", + "intervalFactor": 4, + "legendFormat": "load 1m", + "refId": "A", + "step": 20, + "target": "" + }, + { + "expr": "sum(node_load5)", + "intervalFactor": 4, + "legendFormat": "load 5m", + "refId": "B", + "step": 20, + "target": "" + }, + { + "expr": "sum(node_load15)", + "intervalFactor": 4, + "legendFormat": "load 15m", + "refId": "C", + "step": 20, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 5, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(sum(node_memory_MemFree) / sum(node_memory_MemTotal)) * 100", - "intervalFactor": 2, - "refId": "A", - "step": 60, - "target": "" - } - ], - "thresholds": "10, 20", - "title": "Free memory", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}", + "yaxis": 2 + } + ], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_memory_MemTotal) - sum(node_memory_MemFree)", + "intervalFactor": 2, + "legendFormat": "free memory", + "metric": "memo", + "refId": "A", + "step": 4, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Free memory", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "read", - "yaxis": 1 + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "alias": "{instance=\"172.17.0.1:9100\"}", - "yaxis": 2 + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false }, - { - "alias": "io time", - "yaxis": 2 - } - ], - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(node_disk_bytes_read[5m]))", - "hide": false, - "intervalFactor": 4, - "legendFormat": "read", - "refId": "A", - "step": 8, - "target": "" + "targets": [ + { + "expr": "(sum(node_memory_MemFree) / sum(node_memory_MemTotal)) * 100", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "target": "" + } + ], + "thresholds": "10, 20", + "title": "Free memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - { - "expr": "sum(irate(node_disk_bytes_written[5m]))", - "intervalFactor": 4, - "legendFormat": "written", - "refId": "B", - "step": 8 + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "read", + "yaxis": 1 + }, + { + "alias": "{instance=\"172.17.0.1:9100\"}", + "yaxis": 2 + }, + { + "alias": "io time", + "yaxis": 2 + } + ], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(node_disk_bytes_read[5m]))", + "hide": false, + "intervalFactor": 4, + "legendFormat": "read", + "refId": "A", + "step": 8, + "target": "" + }, + { + "expr": "sum(irate(node_disk_bytes_written[5m]))", + "intervalFactor": 4, + "legendFormat": "written", + "refId": "B", + "step": 8 + }, + { + "expr": "sum(irate(node_disk_io_time_ms[5m]))", + "intervalFactor": 4, + "legendFormat": "io time", + "refId": "C", + "step": 8 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" }, - { - "expr": "sum(irate(node_disk_io_time_ms[5m]))", - "intervalFactor": 4, - "legendFormat": "io time", - "refId": "C", - "step": 8 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] }, - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "percentunit", - "gauge": { - "maxValue": 1, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "id": 7, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(node_filesystem_free{device!=\"rootfs\"}) / sum(node_filesystem_size{device!=\"rootfs\"})", - "intervalFactor": 2, - "refId": "A", - "step": 60, - "target": "" - } - ], - "thresholds": "0.10, 0.25", - "title": "Free disk space", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "transmitted ", - "yaxis": 2 - } - ], - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(node_network_receive_bytes{device!~\"lo\"}[5m]))", - "hide": false, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network received", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "transmitted ", - "yaxis": 2 - } - ], - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(node_network_transmit_bytes{device!~\"lo\"}[5m]))", - "hide": false, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network transmitted", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] + "targets": [ + { + "expr": "sum(node_filesystem_free{device!=\"rootfs\"}) / sum(node_filesystem_size{device!=\"rootfs\"})", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "target": "" + } + ], + "thresholds": "0.10, 0.25", + "title": "Free disk space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "transmitted ", + "yaxis": 2 + } + ], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(node_network_receive_bytes{device!~\"lo\"}[5m]))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network received", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "transmitted ", + "yaxis": 2 + } + ], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(node_network_transmit_bytes{device!~\"lo\"}[5m]))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network transmitted", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "prometheus" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "All Nodes", + "version": 2 + }, + "inputs": [ + { + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "type": "datasource", + "value": "prometheus" } ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "prometheus" - ], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "All Nodes", - "version": 2 + "overwrite": true } diff --git a/kube-prometheus/grafana/deployments-dashboard.json b/kube-prometheus/grafana/deployments-dashboard.json index 0d0a61a5..832a363b 100644 --- a/kube-prometheus/grafana/deployments-dashboard.json +++ b/kube-prometheus/grafana/deployments-dashboard.json @@ -1,806 +1,817 @@ { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "4.1.1" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" + "dashboard": { + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.1.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [] }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - } - ], - "annotations": { - "list": [] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "id": null, - "links": [], - "refresh": "30s", - "rows": [ - { - "collapse": false, - "height": "200px", - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 8, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "height": "200px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "cores", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) ", - "intervalFactor": 2, - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "CPU", - "type": "singlestat", - "valueFontSize": "110%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 9, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "cores", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "GB", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "80%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true + "targets": [ + { + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) ", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "CPU", + "type": "singlestat", + "valueFontSize": "110%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" }, - "targets": [ - { - "expr": "sum(container_memory_usage_bytes{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}) / 1024^3", - "intervalFactor": 2, - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "Memory", - "type": "singlestat", - "valueFontSize": "110%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "Bps", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": false - }, - "id": 7, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) ", - "intervalFactor": 2, - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "Network", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": false - }, - "id": 5, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "GB", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "80%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", - "intervalFactor": 2, - "metric": "kube_deployment_spec_replicas", - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "Desired Replicas", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "targets": [ + { + "expr": "sum(container_memory_usage_bytes{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}) / 1024^3", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Memory", + "type": "singlestat", + "valueFontSize": "110%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" }, - "id": 6, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "Bps", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": false }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", - "intervalFactor": 2, - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "Available Replicas", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 3, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "kube_deployment_status_observed_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "Observed Generation", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 2, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) ", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Network", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "100px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": false }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "kube_deployment_metadata_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "Metadata Generation", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "350px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 1, - "legend": { - "avg": false, - "current": false, - "hideZero": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "metric": "kube_deployment_spec_replicas", + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Desired Replicas", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kube_deployment_status_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", - "intervalFactor": 2, - "legendFormat": "current replicas", - "refId": "A", - "step": 4 + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", - "intervalFactor": 2, - "legendFormat": "available", - "refId": "B", - "step": 4 + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false }, - { - "expr": "kube_deployment_status_replicas_unavailable{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", - "intervalFactor": 2, - "legendFormat": "unavailable", - "refId": "C", - "step": 4 + "targets": [ + { + "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Available Replicas", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "expr": "kube_deployment_status_replicas_updated{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", - "intervalFactor": 2, - "legendFormat": "updated", - "refId": "D", - "step": 4 + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false }, - { - "expr": "kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", - "intervalFactor": 2, - "legendFormat": "desired", - "refId": "E", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Replicas", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] + "targets": [ + { + "expr": "kube_deployment_status_observed_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Observed Generation", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" }, - "yaxes": [ - { - "format": "none", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": ".*", - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "deployment_namespace", - "options": [], - "query": "label_values(kube_deployment_metadata_generation, namespace)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": null, - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false + }, + "targets": [ + { + "expr": "kube_deployment_metadata_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Metadata Generation", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" }, { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": false, - "label": "Deployments", - "multi": false, - "name": "deployment_name", - "options": [], - "query": "label_values(kube_deployment_metadata_generation{namespace=\"$deployment_namespace\"}, deployment)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "deployment", - "type": "query", - "useTags": false + "collapse": false, + "height": "350px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 1, + "legend": { + "avg": false, + "current": false, + "hideZero": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kube_deployment_status_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "current replicas", + "refId": "A", + "step": 4 + }, + { + "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "available", + "refId": "B", + "step": 4 + }, + { + "expr": "kube_deployment_status_replicas_unavailable{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "unavailable", + "refId": "C", + "step": 4 + }, + { + "expr": "kube_deployment_status_replicas_updated{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "updated", + "refId": "D", + "step": 4 + }, + { + "expr": "kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}", + "intervalFactor": 2, + "legendFormat": "desired", + "refId": "E", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Replicas", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "deployment_namespace", + "options": [], + "query": "label_values(kube_deployment_metadata_generation, namespace)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": null, + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Deployments", + "multi": false, + "name": "deployment_name", + "options": [], + "query": "label_values(kube_deployment_metadata_generation{namespace=\"$deployment_namespace\"}, deployment)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "deployment", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Deployments", + "version": 1 }, - "timezone": "browser", - "title": "Deployments", - "version": 1 + "inputs": [ + { + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "type": "datasource", + "value": "prometheus" + } + ], + "overwrite": true } diff --git a/kube-prometheus/grafana/kubernetes-pods-dashboard.json b/kube-prometheus/grafana/kubernetes-pods-dashboard.json index ecf2f499..eea9aa22 100644 --- a/kube-prometheus/grafana/kubernetes-pods-dashboard.json +++ b/kube-prometheus/grafana/kubernetes-pods-dashboard.json @@ -1,421 +1,432 @@ { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "4.1.1" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" + "dashboard": { + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.1.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - } - ], - "annotations": { - "list": [] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "id": null, - "links": [], - "refresh": "30s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 1, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Current: {{ container_name }}", - "metric": "container_memory_usage_bytes", - "refId": "A", - "step": 10 + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 1, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true }, - { - "expr": "kube_pod_container_requested_memory_bytes{pod=\"$pod\", container=~\"$container\"}", - "interval": "10s", - "intervalFactor": 2, - "legendFormat": "Requested: {{ container }}", - "metric": "kube_pod_container_requested_memory_bytes", - "refId": "B", - "step": 20 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Current: {{ container_name }}", + "metric": "container_memory_usage_bytes", + "refId": "A", + "step": 10 + }, + { + "expr": "kube_pod_container_requested_memory_bytes{pod=\"$pod\", container=~\"$container\"}", + "interval": "10s", + "intervalFactor": 2, + "legendFormat": "Requested: {{ container }}", + "metric": "kube_pod_container_requested_memory_bytes", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 2, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m] ) )", - "intervalFactor": 2, - "legendFormat": "{{ container_name }}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 3, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum by (pod_name) (rate (container_network_receive_bytes_total{pod_name=\"$pod\"}[1m]) ))", - "intervalFactor": 2, - "legendFormat": "{{ pod_name }}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network I/O", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": ".*", - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": true, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [], - "query": "label_values(kube_pod_info, namespace)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Row", + "titleSize": "h6" }, { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": false, - "label": "Pod", - "multi": false, - "name": "pod", - "options": [], - "query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m] ) )", + "intervalFactor": 2, + "legendFormat": "{{ container_name }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" }, { - "allValue": ".*", - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": true, - "label": "Container", - "multi": false, - "name": "container", - "options": [], - "query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (pod_name) (rate (container_network_receive_bytes_total{pod_name=\"$pod\"}[1m]) ))", + "intervalFactor": 2, + "legendFormat": "{{ pod_name }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(kube_pod_info, namespace)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [], + "query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Container", + "multi": false, + "name": "container", + "options": [], + "query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Pods", + "version": 2 }, - "timezone": "browser", - "title": "Pods", - "version": 2 + "inputs": [ + { + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "type": "datasource", + "value": "prometheus" + } + ], + "overwrite": true } diff --git a/kube-prometheus/grafana/node-dashboard.json b/kube-prometheus/grafana/node-dashboard.json index cabff34e..c1a507a0 100644 --- a/kube-prometheus/grafana/node-dashboard.json +++ b/kube-prometheus/grafana/node-dashboard.json @@ -1,838 +1,849 @@ { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "4.1.1" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" + "dashboard": { + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.1.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [] }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - } - ], - "annotations": { - "list": [] - }, - "description": "Dashboard to get an overview of one server", - "editable": true, - "gnetId": 22, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [], - "refresh": "30s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "100 - (avg by (cpu) (irate(node_cpu{mode=\"idle\", instance=~\"$server\"}[5m])) * 100)", - "hide": false, - "intervalFactor": 10, - "legendFormat": "{{cpu}}", - "refId": "A", - "step": 50 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Idle cpu", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": "cpu usage", - "logBase": 1, - "max": 100, - "min": 0, - "show": true + "description": "Dashboard to get an overview of one server", + "editable": true, + "gnetId": 22, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_load1{instance=~\"$server\"}", - "intervalFactor": 4, - "legendFormat": "load 1m", - "refId": "A", - "step": 20, - "target": "" + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg by (cpu) (irate(node_cpu{mode=\"idle\", instance=~\"$server\"}[5m])) * 100)", + "hide": false, + "intervalFactor": 10, + "legendFormat": "{{cpu}}", + "refId": "A", + "step": 50 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Idle cpu", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" }, - { - "expr": "node_load5{instance=~\"$server\"}", - "intervalFactor": 4, - "legendFormat": "load 5m", - "refId": "B", - "step": 20, - "target": "" + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] }, - { - "expr": "node_load15{instance=~\"$server\"}", - "intervalFactor": 4, - "legendFormat": "load 15m", - "refId": "C", - "step": 20, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "System load", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] + "yaxes": [ + { + "format": "percent", + "label": "cpu usage", + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}", - "yaxis": 2 - } - ], - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_MemTotal{instance=~\"$server\"} - node_memory_MemFree{instance=~\"$server\"}", - "intervalFactor": 2, - "legendFormat": "free memory", - "metric": "memo", - "refId": "A", - "step": 4, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Free memory", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=~\"$server\"}", + "intervalFactor": 4, + "legendFormat": "load 1m", + "refId": "A", + "step": 20, + "target": "" + }, + { + "expr": "node_load5{instance=~\"$server\"}", + "intervalFactor": 4, + "legendFormat": "load 5m", + "refId": "B", + "step": 20, + "target": "" + }, + { + "expr": "node_load15{instance=~\"$server\"}", + "intervalFactor": 4, + "legendFormat": "load 15m", + "refId": "C", + "step": 20, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 5, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(node_memory_MemFree{instance=~\"$server\"} / node_memory_MemTotal{instance=~\"$server\"}) * 100", - "intervalFactor": 2, - "refId": "A", - "step": 60, - "target": "" - } - ], - "thresholds": "10, 20", - "title": "Free memory", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}", + "yaxis": 2 + } + ], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal{instance=~\"$server\"} - node_memory_MemFree{instance=~\"$server\"}", + "intervalFactor": 2, + "legendFormat": "free memory", + "metric": "memo", + "refId": "A", + "step": 4, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Free memory", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "read", - "yaxis": 1 + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "alias": "{instance=\"172.17.0.1:9100\"}", - "yaxis": 2 + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false }, - { - "alias": "io time", - "yaxis": 2 - } - ], - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (instance) (irate(node_disk_bytes_read{instance=~\"$server\"}[5m]))", - "hide": false, - "intervalFactor": 4, - "legendFormat": "read", - "refId": "A", - "step": 8, - "target": "" + "targets": [ + { + "expr": "(node_memory_MemFree{instance=~\"$server\"} / node_memory_MemTotal{instance=~\"$server\"}) * 100", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "target": "" + } + ], + "thresholds": "10, 20", + "title": "Free memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - { - "expr": "sum by (instance) (irate(node_disk_bytes_written{instance=~\"$server\"}[5m]))", - "intervalFactor": 4, - "legendFormat": "written", - "refId": "B", - "step": 8 + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "read", + "yaxis": 1 + }, + { + "alias": "{instance=\"172.17.0.1:9100\"}", + "yaxis": 2 + }, + { + "alias": "io time", + "yaxis": 2 + } + ], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (irate(node_disk_bytes_read{instance=~\"$server\"}[5m]))", + "hide": false, + "intervalFactor": 4, + "legendFormat": "read", + "refId": "A", + "step": 8, + "target": "" + }, + { + "expr": "sum by (instance) (irate(node_disk_bytes_written{instance=~\"$server\"}[5m]))", + "intervalFactor": 4, + "legendFormat": "written", + "refId": "B", + "step": 8 + }, + { + "expr": "sum by (instance) (irate(node_disk_io_time_ms{instance=~\"$server\"}[5m]))", + "intervalFactor": 4, + "legendFormat": "io time", + "refId": "C", + "step": 8 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" }, - { - "expr": "sum by (instance) (irate(node_disk_io_time_ms{instance=~\"$server\"}[5m]))", - "intervalFactor": 4, - "legendFormat": "io time", - "refId": "C", - "step": 8 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] }, - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "percentunit", - "gauge": { - "maxValue": 1, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "id": 7, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(node_filesystem_free{device!=\"rootfs\",instance=~\"$server\"}) / sum(node_filesystem_size{device!=\"rootfs\",instance=~\"$server\"})", - "intervalFactor": 2, - "refId": "A", - "step": 60, - "target": "" - } - ], - "thresholds": "0.10, 0.25", - "title": "Free disk space", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "transmitted ", - "yaxis": 2 - } - ], - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_bytes{instance=~\"$server\",device!~\"lo\"}[5m])", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network received", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "sum(node_filesystem_free{device!=\"rootfs\",instance=~\"$server\"}) / sum(node_filesystem_size{device!=\"rootfs\",instance=~\"$server\"})", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "target": "" + } + ], + "thresholds": "0.10, 0.25", + "title": "Free disk space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "transmitted ", + "yaxis": 2 + } + ], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes{instance=~\"$server\",device!~\"lo\"}[5m])", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network received", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + { + "alerting": {}, + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "transmitted ", + "yaxis": 2 + } + ], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_transmit_bytes{instance=~\"$server\",device!~\"lo\"}[5m])", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "B", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network transmitted", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "prometheus" + ], + "templating": { + "list": [ { - "alerting": {}, - "aliasColors": {}, - "bars": false, + "allValue": null, + "current": {}, "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "transmitted ", - "yaxis": 2 - } - ], - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_transmit_bytes{instance=~\"$server\",device!~\"lo\"}[5m])", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "B", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network transmitted", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "server", + "options": [], + "query": "label_values(node_boot_time, instance)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Nodes", + "version": 1 + }, + "inputs": [ + { + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "type": "datasource", + "value": "prometheus" } ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "prometheus" - ], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "server", - "options": [], - "query": "label_values(node_boot_time, instance)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Nodes", - "version": 1 + "overwrite": true } diff --git a/kube-prometheus/grafana/prometheus-stats.json b/kube-prometheus/grafana/prometheus-stats.json index bd31d5b9..61911dc4 100644 --- a/kube-prometheus/grafana/prometheus-stats.json +++ b/kube-prometheus/grafana/prometheus-stats.json @@ -1,742 +1,753 @@ { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "4.1.1" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - }, - { - "type": "panel", - "id": "text", - "name": "Text", - "version": "" - } - ], - "annotations": { - "list": [] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": true, - "id": null, - "links": [ - { - "icon": "info", - "tags": [], - "targetBlank": true, - "title": "Grafana Docs", - "tooltip": "", - "type": "link", - "url": "http://www.grafana.org/docs" + "dashboard": { + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.1.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + } + ], + "annotations": { + "list": [] }, - { - "icon": "info", - "tags": [], - "targetBlank": true, - "title": "Prometheus Docs", - "type": "link", - "url": "http://prometheus.io/docs/introduction/overview/" - } - ], - "refresh": "30s", - "revision": "1.0", - "rows": [ - { - "collapse": false, - "height": 178, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 1, - "editable": true, - "error": false, - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 5, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": null, + "links": [ + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Grafana Docs", + "tooltip": "", + "type": "link", + "url": "http://www.grafana.org/docs" + }, + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Prometheus Docs", + "type": "link", + "url": "http://prometheus.io/docs/introduction/overview/" + } + ], + "refresh": "30s", + "revision": "1.0", + "rows": [ + { + "collapse": false, + "height": 178, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 1, + "editable": true, + "error": false, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(time() - process_start_time_seconds{job=\"prometheus-k8s\"})", - "intervalFactor": 2, - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "Uptime", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 6, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "prometheus_local_storage_memory_series", - "intervalFactor": 2, - "refId": "A", - "step": 60 - } - ], - "thresholds": "1,5", - "title": "Local Storage Memory Series", - "type": "singlestat", - "valueFontSize": "70%", - "valueMaps": [], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "targets": [ + { + "expr": "(time() - process_start_time_seconds{job=\"prometheus-k8s\"})", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" }, - "id": 7, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "prometheus_local_storage_indexing_queue_length", - "intervalFactor": 2, - "refId": "A", - "step": 60 - } - ], - "thresholds": "500,4000", - "title": "Internal Storage Queue Length", - "type": "singlestat", - "valueFontSize": "70%", - "valueMaps": [ - { - "op": "=", - "text": "Empty", - "value": "0" - } - ], - "valueName": "current" - }, - { - "content": "\"Prometheus\nPrometheus\n\n

You're using Prometheus, an open-source systems monitoring and alerting toolkit originally built at SoundCloud. For more information, check out the Grafana and Prometheus projects.

", - "editable": true, - "error": false, - "id": 9, - "links": [], - "mode": "html", - "span": 3, - "style": {}, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 227, - "panels": [ - { - "aliasColors": { - "prometheus": "#C15C17", - "{instance=\"localhost:9090\",job=\"prometheus\"}": "#C15C17" - }, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_local_storage_ingested_samples_total[5m])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}}", - "metric": "", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Samples ingested (rate-5m)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", "show": true }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "content": "#### Samples Ingested\nThis graph displays the count of samples ingested by the Prometheus server, as measured over the last 5 minutes, per time series in the range vector. When troubleshooting an issue on IRC or Github, this is often the first stat requested by the Prometheus team. ", - "editable": true, - "error": false, - "id": 8, - "links": [], - "mode": "markdown", - "span": 2.995914043583536, - "style": {}, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "prometheus": "#F9BA8F", - "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" + "targets": [ + { + "expr": "prometheus_local_storage_memory_series", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "1,5", + "title": "Local Storage Memory Series", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [], + "valueName": "current" }, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 5, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_target_interval_length_seconds_count[5m])", - "intervalFactor": 2, - "legendFormat": "{{job}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Target Scrapes (last 5m)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_target_interval_length_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{quantile}} ({{interval}})", - "metric": "", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Scrape Duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] + }, + "targets": [ + { + "expr": "prometheus_local_storage_indexing_queue_length", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "500,4000", + "title": "Internal Storage Queue Length", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "Empty", + "value": "0" + } + ], + "valueName": "current" }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true + { + "content": "\"Prometheus\nPrometheus\n\n

You're using Prometheus, an open-source systems monitoring and alerting toolkit originally built at SoundCloud. For more information, check out the Grafana and Prometheus projects.

", + "editable": true, + "error": false, + "id": 9, + "links": [], + "mode": "html", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 227, + "panels": [ + { + "aliasColors": { + "prometheus": "#C15C17", + "{instance=\"localhost:9090\",job=\"prometheus\"}": "#C15C17" }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "content": "#### Scrapes\nPrometheus scrapes metrics from instrumented jobs, either directly or via an intermediary push gateway for short-lived jobs. Target scrapes will show how frequently targets are scraped, as measured over the last 5 minutes, per time series in the range vector. Scrape Duration will show how long the scrapes are taking, with percentiles available as series. ", - "editable": true, - "error": false, - "id": 11, - "links": [], - "mode": "markdown", - "span": 3, - "style": {}, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 12, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_local_storage_ingested_samples_total[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Samples ingested (rate-5m)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_evaluator_duration_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{quantile}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Rule Eval Duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" + { + "content": "#### Samples Ingested\nThis graph displays the count of samples ingested by the Prometheus server, as measured over the last 5 minutes, per time series in the range vector. When troubleshooting an issue on IRC or Github, this is often the first stat requested by the Prometheus team. ", + "editable": true, + "error": false, + "id": 8, + "links": [], + "mode": "markdown", + "span": 2.995914043583536, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "prometheus": "#F9BA8F", + "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" + }, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_target_interval_length_seconds_count[5m])", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Target Scrapes (last 5m)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_target_interval_length_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{quantile}} ({{interval}})", + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Scrape Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "yaxes": [ - { - "format": "percentunit", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true + { + "content": "#### Scrapes\nPrometheus scrapes metrics from instrumented jobs, either directly or via an intermediary push gateway for short-lived jobs. Target scrapes will show how frequently targets are scraped, as measured over the last 5 minutes, per time series in the range vector. Scrape Duration will show how long the scrapes are taking, with percentiles available as series. ", + "editable": true, + "error": false, + "id": 11, + "links": [], + "mode": "markdown", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "content": "#### Rule Evaluation Duration\nThis graph panel plots the duration for all evaluations to execute. The 50th percentile, 90th percentile and 99th percentile are shown as three separate series to help identify outliers that may be skewing the data.", - "editable": true, - "error": false, - "id": 15, - "links": [], - "mode": "markdown", - "span": 3, - "style": {}, - "title": "", - "transparent": true, - "type": "text" - } + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_evaluator_duration_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{quantile}}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Rule Eval Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "content": "#### Rule Evaluation Duration\nThis graph panel plots the duration for all evaluations to execute. The 50th percentile, 90th percentile and 99th percentile are shown as three separate series to help identify outliers that may be skewing the data.", + "editable": true, + "error": false, + "id": 15, + "links": [], + "mode": "markdown", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "prometheus" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Prometheus Stats", + "version": 1 + }, + "inputs": [ + { + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "type": "datasource", + "value": "prometheus" } ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "prometheus" - ], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "now": true, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Prometheus Stats", - "version": 1 + "overwrite": true } From 32f59165ec0be854c73f21aa90dc1565775ed01f Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Mon, 30 Jan 2017 16:14:12 -0600 Subject: [PATCH 09/41] Prep for TPRs working --- Makefile | 12 +++---- kube-prometheus/Chart.yaml | 2 +- kube-prometheus/templates/get-tprs-job.yaml | 33 +++++++++++++++++++ ...nfigmap.yaml => prometheus-configmap.yaml} | 0 ...s-ingress.yaml => prometheus-ingress.yaml} | 0 ...s-k8s-rules.yaml => prometheus-rules.yaml} | 0 ...s-service.yaml => prometheus-service.yaml} | 0 7 files changed, 38 insertions(+), 9 deletions(-) create mode 100644 kube-prometheus/templates/get-tprs-job.yaml rename kube-prometheus/templates/{prometheus-k8s-configmap.yaml => prometheus-configmap.yaml} (100%) rename kube-prometheus/templates/{prometheus-k8s-ingress.yaml => prometheus-ingress.yaml} (100%) rename kube-prometheus/templates/{prometheus-k8s-rules.yaml => prometheus-rules.yaml} (100%) rename kube-prometheus/templates/{prometheus-k8s-service.yaml => prometheus-service.yaml} (100%) diff --git a/Makefile b/Makefile index 6e7b51bb..fd9ab7a2 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,10 @@ -kube-prometheus: prometheus-operator kube-prometheus-*.tgz - -prometheus-operator: prometheus-operator-*.tgz - -kube-prometheus-*.tgz: - cp prometheus-operator-*.tgz kube-prometheus/charts +kube-prometheus: kube-prometheus-*.tgz + helm lint kube-prometheus helm package kube-prometheus -prometheus-operator-*.tgz: +prometheus-operator: prometheus-operator-*.tgz + helm lint prometheus-operator helm package prometheus-operator clean: - rm -f kube-prometheus/charts/* rm -f *.tgz diff --git a/kube-prometheus/Chart.yaml b/kube-prometheus/Chart.yaml index 2a5d1179..46dd667b 100644 --- a/kube-prometheus/Chart.yaml +++ b/kube-prometheus/Chart.yaml @@ -6,4 +6,4 @@ maintainers: - name: Michael Goodness email: mgoodness@gmail.com name: kube-prometheus -version: 0.1.1 +version: 0.2.0 diff --git a/kube-prometheus/templates/get-tprs-job.yaml b/kube-prometheus/templates/get-tprs-job.yaml new file mode 100644 index 00000000..eec5aa98 --- /dev/null +++ b/kube-prometheus/templates/get-tprs-job.yaml @@ -0,0 +1,33 @@ +apiVersion: batch/v1 +kind: Job +metadata: + annotations: + helm.sh/hook: pre-install + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-get-tprs +spec: + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-get-tprs + spec: + containers: + - name: get-alertmanager-tpr + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","get","alertmanager"] + - name: get-prometheus-tpr + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","get","prometheus"] + - name: get-servicemonitor-tpr + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","get","servicemonitor"] + restartPolicy: OnFailure diff --git a/kube-prometheus/templates/prometheus-k8s-configmap.yaml b/kube-prometheus/templates/prometheus-configmap.yaml similarity index 100% rename from kube-prometheus/templates/prometheus-k8s-configmap.yaml rename to kube-prometheus/templates/prometheus-configmap.yaml diff --git a/kube-prometheus/templates/prometheus-k8s-ingress.yaml b/kube-prometheus/templates/prometheus-ingress.yaml similarity index 100% rename from kube-prometheus/templates/prometheus-k8s-ingress.yaml rename to kube-prometheus/templates/prometheus-ingress.yaml diff --git a/kube-prometheus/templates/prometheus-k8s-rules.yaml b/kube-prometheus/templates/prometheus-rules.yaml similarity index 100% rename from kube-prometheus/templates/prometheus-k8s-rules.yaml rename to kube-prometheus/templates/prometheus-rules.yaml diff --git a/kube-prometheus/templates/prometheus-k8s-service.yaml b/kube-prometheus/templates/prometheus-service.yaml similarity index 100% rename from kube-prometheus/templates/prometheus-k8s-service.yaml rename to kube-prometheus/templates/prometheus-service.yaml From 3a208bb006893b73b74fdba878e232406139f1a4 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Mon, 30 Jan 2017 17:40:01 -0600 Subject: [PATCH 10/41] Separate TPR hooks --- prometheus-operator/Chart.yaml | 2 +- .../delete-alertmanager-tpr-job.yaml | 25 +++++++++++++++++++ .../templates/delete-prometheus-tpr-job.yaml | 25 +++++++++++++++++++ ...aml => delete-servicemonitor-tpr-job.yaml} | 10 +------- .../templates/get-alertmanager-tpr-job.yaml | 25 +++++++++++++++++++ .../templates/get-prometheus-tpr-job.yaml | 25 +++++++++++++++++++ ...b.yaml => get-servicemonitor-tpr-job.yaml} | 10 +------- 7 files changed, 103 insertions(+), 19 deletions(-) create mode 100644 prometheus-operator/templates/delete-alertmanager-tpr-job.yaml create mode 100644 prometheus-operator/templates/delete-prometheus-tpr-job.yaml rename prometheus-operator/templates/{delete-tprs-job.yaml => delete-servicemonitor-tpr-job.yaml} (53%) create mode 100644 prometheus-operator/templates/get-alertmanager-tpr-job.yaml create mode 100644 prometheus-operator/templates/get-prometheus-tpr-job.yaml rename prometheus-operator/templates/{get-tprs-job.yaml => get-servicemonitor-tpr-job.yaml} (55%) diff --git a/prometheus-operator/Chart.yaml b/prometheus-operator/Chart.yaml index ac020180..4fa7696d 100644 --- a/prometheus-operator/Chart.yaml +++ b/prometheus-operator/Chart.yaml @@ -6,4 +6,4 @@ maintainers: - name: Michael Goodness email: mgoodness@gmail.com name: prometheus-operator -version: 0.1.2 +version: 0.1.3 diff --git a/prometheus-operator/templates/delete-alertmanager-tpr-job.yaml b/prometheus-operator/templates/delete-alertmanager-tpr-job.yaml new file mode 100644 index 00000000..43a1ae3a --- /dev/null +++ b/prometheus-operator/templates/delete-alertmanager-tpr-job.yaml @@ -0,0 +1,25 @@ +apiVersion: batch/v1 +kind: Job +metadata: + annotations: + helm.sh/hook: post-delete + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-delete-tprs +spec: + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-delete-tprs + spec: + containers: + - name: hyperkube + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","thirdpartyresource/alertmanager.monitoring.coreos.com"] + restartPolicy: Never diff --git a/prometheus-operator/templates/delete-prometheus-tpr-job.yaml b/prometheus-operator/templates/delete-prometheus-tpr-job.yaml new file mode 100644 index 00000000..bd7df679 --- /dev/null +++ b/prometheus-operator/templates/delete-prometheus-tpr-job.yaml @@ -0,0 +1,25 @@ +apiVersion: batch/v1 +kind: Job +metadata: + annotations: + helm.sh/hook: post-delete + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-delete-tprs +spec: + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-delete-tprs + spec: + containers: + - name: hyperkube + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","thirdpartyresource/prometheus.monitoring.coreos.com"] + restartPolicy: Never diff --git a/prometheus-operator/templates/delete-tprs-job.yaml b/prometheus-operator/templates/delete-servicemonitor-tpr-job.yaml similarity index 53% rename from prometheus-operator/templates/delete-tprs-job.yaml rename to prometheus-operator/templates/delete-servicemonitor-tpr-job.yaml index 2b401d35..99d4047e 100644 --- a/prometheus-operator/templates/delete-tprs-job.yaml +++ b/prometheus-operator/templates/delete-servicemonitor-tpr-job.yaml @@ -18,15 +18,7 @@ spec: name: {{ template "fullname" . }}-delete-tprs spec: containers: - - name: delete-alertmanager-tpr - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","thirdpartyresource/alertmanager.monitoring.coreos.com"] - - name: delete-prometheus-tpr - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","thirdpartyresource/prometheus.monitoring.coreos.com"] - - name: delete-servicemonitor-tpr + - name: hyperkube image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","thirdpartyresource/service-monitor.monitoring.coreos.com"] diff --git a/prometheus-operator/templates/get-alertmanager-tpr-job.yaml b/prometheus-operator/templates/get-alertmanager-tpr-job.yaml new file mode 100644 index 00000000..c14f83e5 --- /dev/null +++ b/prometheus-operator/templates/get-alertmanager-tpr-job.yaml @@ -0,0 +1,25 @@ +apiVersion: batch/v1 +kind: Job +metadata: + annotations: + helm.sh/hook: post-install + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-get-tprs +spec: + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-get-tprs + spec: + containers: + - name: hyperkube + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","get","alertmanager"] + restartPolicy: OnFailure diff --git a/prometheus-operator/templates/get-prometheus-tpr-job.yaml b/prometheus-operator/templates/get-prometheus-tpr-job.yaml new file mode 100644 index 00000000..1b7c3b79 --- /dev/null +++ b/prometheus-operator/templates/get-prometheus-tpr-job.yaml @@ -0,0 +1,25 @@ +apiVersion: batch/v1 +kind: Job +metadata: + annotations: + helm.sh/hook: post-install + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-get-tprs +spec: + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-get-tprs + spec: + containers: + - name: hyperkube + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","get","prometheus"] + restartPolicy: OnFailure diff --git a/prometheus-operator/templates/get-tprs-job.yaml b/prometheus-operator/templates/get-servicemonitor-tpr-job.yaml similarity index 55% rename from prometheus-operator/templates/get-tprs-job.yaml rename to prometheus-operator/templates/get-servicemonitor-tpr-job.yaml index a5ada005..021608d4 100644 --- a/prometheus-operator/templates/get-tprs-job.yaml +++ b/prometheus-operator/templates/get-servicemonitor-tpr-job.yaml @@ -18,15 +18,7 @@ spec: name: {{ template "fullname" . }}-get-tprs spec: containers: - - name: get-alertmanager-tpr - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","get","alertmanager"] - - name: get-prometheus-tpr - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","get","prometheus"] - - name: get-servicemonitor-tpr + - name: hyperkube image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" command: ["./kubectl","-n","{{ .Release.Namespace }}","get","servicemonitor"] From fce4e796f11f347ab7c618534a958b76d03c94a8 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Mon, 30 Jan 2017 19:02:16 -0600 Subject: [PATCH 11/41] Jobs fixes --- .../templates/delete-alertmanager-tpr-job.yaml | 4 ++-- prometheus-operator/templates/delete-prometheus-tpr-job.yaml | 4 ++-- .../templates/delete-servicemonitor-tpr-job.yaml | 4 ++-- prometheus-operator/templates/get-alertmanager-tpr-job.yaml | 4 ++-- prometheus-operator/templates/get-prometheus-tpr-job.yaml | 4 ++-- prometheus-operator/templates/get-servicemonitor-tpr-job.yaml | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/prometheus-operator/templates/delete-alertmanager-tpr-job.yaml b/prometheus-operator/templates/delete-alertmanager-tpr-job.yaml index 43a1ae3a..f71ca5be 100644 --- a/prometheus-operator/templates/delete-alertmanager-tpr-job.yaml +++ b/prometheus-operator/templates/delete-alertmanager-tpr-job.yaml @@ -8,14 +8,14 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version }} heritage: {{ .Release.Service }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-tprs + name: {{ template "fullname" . }}-delete-alertmanager-tpr spec: template: metadata: labels: app: {{ template "name" . }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-tprs + name: {{ template "fullname" . }}-delete-alertmanager-tpr spec: containers: - name: hyperkube diff --git a/prometheus-operator/templates/delete-prometheus-tpr-job.yaml b/prometheus-operator/templates/delete-prometheus-tpr-job.yaml index bd7df679..1021901b 100644 --- a/prometheus-operator/templates/delete-prometheus-tpr-job.yaml +++ b/prometheus-operator/templates/delete-prometheus-tpr-job.yaml @@ -8,14 +8,14 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version }} heritage: {{ .Release.Service }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-tprs + name: {{ template "fullname" . }}-delete-prometheus-tpr spec: template: metadata: labels: app: {{ template "name" . }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-tprs + name: {{ template "fullname" . }}-delete-prometheus-tpr spec: containers: - name: hyperkube diff --git a/prometheus-operator/templates/delete-servicemonitor-tpr-job.yaml b/prometheus-operator/templates/delete-servicemonitor-tpr-job.yaml index 99d4047e..4f5ead25 100644 --- a/prometheus-operator/templates/delete-servicemonitor-tpr-job.yaml +++ b/prometheus-operator/templates/delete-servicemonitor-tpr-job.yaml @@ -8,14 +8,14 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version }} heritage: {{ .Release.Service }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-tprs + name: {{ template "fullname" . }}-delete-servicemonitor-tpr spec: template: metadata: labels: app: {{ template "name" . }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-tprs + name: {{ template "fullname" . }}-delete-servicemonitor-tpr spec: containers: - name: hyperkube diff --git a/prometheus-operator/templates/get-alertmanager-tpr-job.yaml b/prometheus-operator/templates/get-alertmanager-tpr-job.yaml index c14f83e5..b803d31b 100644 --- a/prometheus-operator/templates/get-alertmanager-tpr-job.yaml +++ b/prometheus-operator/templates/get-alertmanager-tpr-job.yaml @@ -8,14 +8,14 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version }} heritage: {{ .Release.Service }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-tprs + name: {{ template "fullname" . }}-get-alertmanager-tpr spec: template: metadata: labels: app: {{ template "name" . }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-tprs + name: {{ template "fullname" . }}-get-alertmanager-tpr spec: containers: - name: hyperkube diff --git a/prometheus-operator/templates/get-prometheus-tpr-job.yaml b/prometheus-operator/templates/get-prometheus-tpr-job.yaml index 1b7c3b79..4f79aff6 100644 --- a/prometheus-operator/templates/get-prometheus-tpr-job.yaml +++ b/prometheus-operator/templates/get-prometheus-tpr-job.yaml @@ -8,14 +8,14 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version }} heritage: {{ .Release.Service }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-tprs + name: {{ template "fullname" . }}-get-prometheus-tpr spec: template: metadata: labels: app: {{ template "name" . }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-tprs + name: {{ template "fullname" . }}-get-prometheus-tpr spec: containers: - name: hyperkube diff --git a/prometheus-operator/templates/get-servicemonitor-tpr-job.yaml b/prometheus-operator/templates/get-servicemonitor-tpr-job.yaml index 021608d4..e001efbb 100644 --- a/prometheus-operator/templates/get-servicemonitor-tpr-job.yaml +++ b/prometheus-operator/templates/get-servicemonitor-tpr-job.yaml @@ -8,14 +8,14 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version }} heritage: {{ .Release.Service }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-tprs + name: {{ template "fullname" . }}-get-servicemonitor-tpr spec: template: metadata: labels: app: {{ template "name" . }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-tprs + name: {{ template "fullname" . }}-get-servicemonitor-tpr spec: containers: - name: hyperkube From ecced6ff7b10f589f7b157469b8524b86aa12d59 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Mon, 30 Jan 2017 20:16:27 -0600 Subject: [PATCH 12/41] Cleanup services --- .../delete-alertmanager-svc-job.yaml | 25 +++++++++++++++++++ .../templates/delete-prometheus-svc-job.yaml | 25 +++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 prometheus-operator/templates/delete-alertmanager-svc-job.yaml create mode 100644 prometheus-operator/templates/delete-prometheus-svc-job.yaml diff --git a/prometheus-operator/templates/delete-alertmanager-svc-job.yaml b/prometheus-operator/templates/delete-alertmanager-svc-job.yaml new file mode 100644 index 00000000..200ef6c9 --- /dev/null +++ b/prometheus-operator/templates/delete-alertmanager-svc-job.yaml @@ -0,0 +1,25 @@ +apiVersion: batch/v1 +kind: Job +metadata: + annotations: + helm.sh/hook: post-delete + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-delete-alertmanager-svc +spec: + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-delete-alertmanager-svc + spec: + containers: + - name: hyperkube + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","svc/alertmanager"] + restartPolicy: Never diff --git a/prometheus-operator/templates/delete-prometheus-svc-job.yaml b/prometheus-operator/templates/delete-prometheus-svc-job.yaml new file mode 100644 index 00000000..74c125cf --- /dev/null +++ b/prometheus-operator/templates/delete-prometheus-svc-job.yaml @@ -0,0 +1,25 @@ +apiVersion: batch/v1 +kind: Job +metadata: + annotations: + helm.sh/hook: post-delete + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-delete-prometheus-svc +spec: + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-delete-prometheus-svc + spec: + containers: + - name: hyperkube + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","svc/prometheus"] + restartPolicy: Never From 4b5b40ac9e6870ac34f1a259601008765a54ca7b Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Mon, 30 Jan 2017 20:17:01 -0600 Subject: [PATCH 13/41] Hardcoded alertmanager & prometheus names --- Makefile | 6 ++++ .../grafana/all-nodes-dashboard.json | 30 ++++++++--------- .../grafana/deployments-dashboard.json | 28 ++++++++-------- .../grafana/kubernetes-pods-dashboard.json | 20 +++++------ kube-prometheus/grafana/node-dashboard.json | 26 +++++++-------- kube-prometheus/grafana/prometheus-stats.json | 22 ++++++------- kube-prometheus/templates/NOTES.txt | 8 ++--- .../templates/alertmanager-configmap.yaml | 4 +-- .../templates/alertmanager-ingress.yaml | 4 +-- .../templates/alertmanager-service.yaml | 6 ++-- .../templates/delete-alertmanager-job.yaml | 8 ++--- .../templates/delete-prometheus-job.yaml | 8 ++--- kube-prometheus/templates/get-tprs-job.yaml | 33 ------------------- .../templates/grafana-configmap.yaml | 4 +-- .../templates/prometheus-configmap.yaml | 8 ++--- .../templates/prometheus-ingress.yaml | 4 +-- .../templates/prometheus-rules.yaml | 4 +-- .../templates/prometheus-service.yaml | 6 ++-- kube-prometheus/values.yaml | 10 ------ 19 files changed, 97 insertions(+), 142 deletions(-) delete mode 100644 kube-prometheus/templates/get-tprs-job.yaml diff --git a/Makefile b/Makefile index fd9ab7a2..b15188e0 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,14 @@ kube-prometheus: kube-prometheus-*.tgz + +kube-prometheus-*.tgz: + helm dep update kube-prometheus helm lint kube-prometheus helm package kube-prometheus prometheus-operator: prometheus-operator-*.tgz + +prometheus-operator-*.tgz: + helm dep update prometheus-operator helm lint prometheus-operator helm package prometheus-operator diff --git a/kube-prometheus/grafana/all-nodes-dashboard.json b/kube-prometheus/grafana/all-nodes-dashboard.json index 618c883c..e66e1797 100644 --- a/kube-prometheus/grafana/all-nodes-dashboard.json +++ b/kube-prometheus/grafana/all-nodes-dashboard.json @@ -2,8 +2,8 @@ "dashboard": { "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", + "name": "DS_PROMETHEUS-K8S", + "label": "prometheus-k8s", "description": "", "type": "datasource", "pluginId": "prometheus", @@ -56,7 +56,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -88,7 +88,7 @@ "expr": "sum(rate(node_cpu{mode=\"idle\"}[2m])) * 100", "hide": false, "intervalFactor": 10, - "legendFormat": "", + "legendFormat": "idle cpu", "refId": "A", "step": 50 } @@ -133,7 +133,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -238,7 +238,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -326,7 +326,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "percent", @@ -410,7 +410,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -520,7 +520,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "percentunit", @@ -604,7 +604,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -641,7 +641,7 @@ "expr": "sum(irate(node_network_receive_bytes{device!~\"lo\"}[5m]))", "hide": false, "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "bytes received", "refId": "A", "step": 10, "target": "" @@ -687,7 +687,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -724,7 +724,7 @@ "expr": "sum(irate(node_network_transmit_bytes{device!~\"lo\"}[5m]))", "hide": false, "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "bytes transmitted", "refId": "B", "step": 10, "target": "" @@ -818,10 +818,10 @@ }, "inputs": [ { - "name": "DS_PROMETHEUS", + "name": "DS_PROMETHEUS-K8S", "pluginId": "prometheus", "type": "datasource", - "value": "prometheus" + "value": "prometheus-k8s" } ], "overwrite": true diff --git a/kube-prometheus/grafana/deployments-dashboard.json b/kube-prometheus/grafana/deployments-dashboard.json index 832a363b..314ccf40 100644 --- a/kube-prometheus/grafana/deployments-dashboard.json +++ b/kube-prometheus/grafana/deployments-dashboard.json @@ -2,8 +2,8 @@ "dashboard": { "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", + "name": "DS_PROMETHEUS-K8S", + "label": "prometheus-k8s", "description": "", "type": "datasource", "pluginId": "prometheus", @@ -60,7 +60,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "none", @@ -136,7 +136,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "none", @@ -212,7 +212,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "Bps", @@ -300,7 +300,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "decimals": null, "editable": true, "error": false, @@ -378,7 +378,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "none", @@ -454,7 +454,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "none", @@ -531,7 +531,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "none", @@ -614,7 +614,7 @@ { "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -733,7 +733,7 @@ { "allValue": ".*", "current": {}, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "hide": 0, "includeAll": false, "label": "Namespace", @@ -753,7 +753,7 @@ { "allValue": null, "current": {}, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "hide": 0, "includeAll": false, "label": "Deployments", @@ -807,10 +807,10 @@ }, "inputs": [ { - "name": "DS_PROMETHEUS", + "name": "DS_PROMETHEUS-K8S", "pluginId": "prometheus", "type": "datasource", - "value": "prometheus" + "value": "prometheus-k8s" } ], "overwrite": true diff --git a/kube-prometheus/grafana/kubernetes-pods-dashboard.json b/kube-prometheus/grafana/kubernetes-pods-dashboard.json index eea9aa22..182aa2e6 100644 --- a/kube-prometheus/grafana/kubernetes-pods-dashboard.json +++ b/kube-prometheus/grafana/kubernetes-pods-dashboard.json @@ -2,8 +2,8 @@ "dashboard": { "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", + "name": "DS_PROMETHEUS-K8S", + "label": "prometheus-k8s", "description": "", "type": "datasource", "pluginId": "prometheus", @@ -48,7 +48,7 @@ { "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -148,7 +148,7 @@ { "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -237,7 +237,7 @@ { "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -328,7 +328,7 @@ { "allValue": ".*", "current": {}, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "hide": 0, "includeAll": true, "label": "Namespace", @@ -348,7 +348,7 @@ { "allValue": null, "current": {}, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "hide": 0, "includeAll": false, "label": "Pod", @@ -368,7 +368,7 @@ { "allValue": ".*", "current": {}, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "hide": 0, "includeAll": true, "label": "Container", @@ -422,10 +422,10 @@ }, "inputs": [ { - "name": "DS_PROMETHEUS", + "name": "DS_PROMETHEUS-K8S", "pluginId": "prometheus", "type": "datasource", - "value": "prometheus" + "value": "prometheus-k8s" } ], "overwrite": true diff --git a/kube-prometheus/grafana/node-dashboard.json b/kube-prometheus/grafana/node-dashboard.json index c1a507a0..28e2be7a 100644 --- a/kube-prometheus/grafana/node-dashboard.json +++ b/kube-prometheus/grafana/node-dashboard.json @@ -2,8 +2,8 @@ "dashboard": { "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", + "name": "DS_PROMETHEUS-K8S", + "label": "prometheus-k8s", "description": "", "type": "datasource", "pluginId": "prometheus", @@ -56,7 +56,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -133,7 +133,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -238,7 +238,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -326,7 +326,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "percent", @@ -410,7 +410,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -520,7 +520,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "percentunit", @@ -604,7 +604,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -687,7 +687,7 @@ "alerting": {}, "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -785,7 +785,7 @@ { "allValue": null, "current": {}, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "hide": 0, "includeAll": false, "label": null, @@ -839,10 +839,10 @@ }, "inputs": [ { - "name": "DS_PROMETHEUS", + "name": "DS_PROMETHEUS-K8S", "pluginId": "prometheus", "type": "datasource", - "value": "prometheus" + "value": "prometheus-k8s" } ], "overwrite": true diff --git a/kube-prometheus/grafana/prometheus-stats.json b/kube-prometheus/grafana/prometheus-stats.json index 61911dc4..f2690b31 100644 --- a/kube-prometheus/grafana/prometheus-stats.json +++ b/kube-prometheus/grafana/prometheus-stats.json @@ -2,8 +2,8 @@ "dashboard": { "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", + "name": "DS_PROMETHEUS-K8S", + "label": "prometheus-k8s", "description": "", "type": "datasource", "pluginId": "prometheus", @@ -85,7 +85,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "decimals": 1, "editable": true, "error": false, @@ -162,7 +162,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "none", @@ -232,7 +232,7 @@ "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "format": "none", @@ -330,7 +330,7 @@ "{instance=\"localhost:9090\",job=\"prometheus\"}": "#C15C17" }, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -432,7 +432,7 @@ "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" }, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -504,7 +504,7 @@ { "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "editable": true, "error": false, "fill": 1, @@ -603,7 +603,7 @@ { "aliasColors": {}, "bars": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "${DS_PROMETHEUS-K8S}", "decimals": null, "editable": true, "error": false, @@ -743,10 +743,10 @@ }, "inputs": [ { - "name": "DS_PROMETHEUS", + "name": "DS_PROMETHEUS-K8S", "pluginId": "prometheus", "type": "datasource", - "value": "prometheus" + "value": "prometheus-k8s" } ], "overwrite": true diff --git a/kube-prometheus/templates/NOTES.txt b/kube-prometheus/templates/NOTES.txt index 88fa073a..1a18eee6 100644 --- a/kube-prometheus/templates/NOTES.txt +++ b/kube-prometheus/templates/NOTES.txt @@ -6,13 +6,13 @@ apiVersion: monitoring.coreos.com/v1alpha1 kind: Alertmanager metadata: labels: - alertmanager: "{{ .Values.alertmanager.name }}" + alertmanager: main app: {{ template "name" . }} chart: {{ .Chart.Name }}-{{ .Chart.Version }} component: alertmanager heritage: {{ .Release.Service }} release: {{ .Release.Name }} - name: alertmanager-{{ .Values.alertmanager.name }} + name: alertmanager-main namespace: {{ .Release.Namespace }} spec: replicas: {{ .Values.alertmanager.replicaCount }} @@ -31,9 +31,9 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version }} component: prometheus heritage: {{ .Release.Service }} - prometheus: "{{ .Values.prometheus.name }}" + prometheus: k8s release: {{ .Release.Name }} - name: prometheus-{{ .Values.prometheus.name }} + name: prometheus-k8s namespace: {{ .Release.Namespace }} spec: resources: diff --git a/kube-prometheus/templates/alertmanager-configmap.yaml b/kube-prometheus/templates/alertmanager-configmap.yaml index b2c19086..d39511bc 100644 --- a/kube-prometheus/templates/alertmanager-configmap.yaml +++ b/kube-prometheus/templates/alertmanager-configmap.yaml @@ -2,13 +2,13 @@ apiVersion: v1 kind: ConfigMap metadata: labels: - alertmanager: "{{ .Values.alertmanager.name }}" + alertmanager: main app: {{ template "name" . }} chart: {{ .Chart.Name }}-{{ .Chart.Version }} component: alertmanager heritage: {{ .Release.Service }} release: {{ .Release.Name }} - name: alertmanager-{{ .Values.alertmanager.name }} + name: alertmanager-main data: alertmanager.yaml: |- global: diff --git a/kube-prometheus/templates/alertmanager-ingress.yaml b/kube-prometheus/templates/alertmanager-ingress.yaml index 72529433..87dbb0f3 100644 --- a/kube-prometheus/templates/alertmanager-ingress.yaml +++ b/kube-prometheus/templates/alertmanager-ingress.yaml @@ -9,7 +9,7 @@ metadata: {{ toYaml .annotations | indent 4 }} {{- end }} labels: - alertmanager: "{{ $root.Values.alertmanager.name }}" + alertmanager: main app: {{ template "name" $root }} chart: {{ $root.Chart.Name }}-{{ $root.Chart.Version }} component: alertmanager @@ -24,7 +24,7 @@ spec: paths: - path: / backend: - serviceName: alertmanager-{{ $root.Values.alertmanager.name }} + serviceName: alertmanager-main servicePort: 9093 {{- end }} {{- if .tls }} diff --git a/kube-prometheus/templates/alertmanager-service.yaml b/kube-prometheus/templates/alertmanager-service.yaml index e5781006..74ce3013 100644 --- a/kube-prometheus/templates/alertmanager-service.yaml +++ b/kube-prometheus/templates/alertmanager-service.yaml @@ -6,13 +6,13 @@ metadata: {{ toYaml .Values.alertmanager.service.annotations | indent 4 }} {{- end }} labels: - alertmanager: "{{ .Values.alertmanager.name }}" + alertmanager: main app: {{ template "name" . }} chart: {{ .Chart.Name }}-{{ .Chart.Version }} component: alertmanager heritage: {{ .Release.Service }} release: {{ .Release.Name }} - name: alertmanager-{{ .Values.alertmanager.name }} + name: alertmanager-main spec: ports: - name: http @@ -23,5 +23,5 @@ spec: targetPort: 9093 protocol: TCP selector: - alertmanager: alertmanager-{{ .Values.alertmanager.name }} + alertmanager: alertmanager-main type: "{{ .Values.alertmanager.service.type }}" diff --git a/kube-prometheus/templates/delete-alertmanager-job.yaml b/kube-prometheus/templates/delete-alertmanager-job.yaml index b6b7984f..00e0af03 100644 --- a/kube-prometheus/templates/delete-alertmanager-job.yaml +++ b/kube-prometheus/templates/delete-alertmanager-job.yaml @@ -19,13 +19,9 @@ spec: name: {{ template "fullname" . }}-delete-alertmanager spec: containers: - - name: delete-service + - name: hyperkube image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","svc/alertmanager"] - - name: delete-statefulset - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","statefulsets/alertmanager-{{ .Values.alertmanager.name }}"] + command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","alertmanager/alertmanager-main"] restartPolicy: Never {{- end }} diff --git a/kube-prometheus/templates/delete-prometheus-job.yaml b/kube-prometheus/templates/delete-prometheus-job.yaml index 9b8245ce..b499e2fc 100644 --- a/kube-prometheus/templates/delete-prometheus-job.yaml +++ b/kube-prometheus/templates/delete-prometheus-job.yaml @@ -19,13 +19,9 @@ spec: name: {{ template "fullname" . }}-delete-prometheus spec: containers: - - name: delete-service + - name: hyperkube image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","svc/prometheus"] - - name: delete-statefulset - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","statefulsets/prometheus-{{ .Values.prometheus.name }}"] + command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","prometheus/prometheus-k8s"] restartPolicy: Never {{- end }} diff --git a/kube-prometheus/templates/get-tprs-job.yaml b/kube-prometheus/templates/get-tprs-job.yaml deleted file mode 100644 index eec5aa98..00000000 --- a/kube-prometheus/templates/get-tprs-job.yaml +++ /dev/null @@ -1,33 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - annotations: - helm.sh/hook: pre-install - labels: - app: {{ template "name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-tprs -spec: - template: - metadata: - labels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-tprs - spec: - containers: - - name: get-alertmanager-tpr - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","get","alertmanager"] - - name: get-prometheus-tpr - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","get","prometheus"] - - name: get-servicemonitor-tpr - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","get","servicemonitor"] - restartPolicy: OnFailure diff --git a/kube-prometheus/templates/grafana-configmap.yaml b/kube-prometheus/templates/grafana-configmap.yaml index b861f721..fa9ff9e5 100644 --- a/kube-prometheus/templates/grafana-configmap.yaml +++ b/kube-prometheus/templates/grafana-configmap.yaml @@ -13,8 +13,8 @@ data: { "access": "proxy", "basicAuth": false, - "name": "prometheus", + "name": "prometheus-k8s", "type": "prometheus", - "url": "http://prometheus-{{ .Values.prometheus.name }}.{{ .Release.Namespace }}.svc:9090" + "url": "http://prometheus-k8s.{{ .Release.Namespace }}.svc:9090" } {{ (.Files.Glob "grafana/*.json").AsConfig | indent 2 }} diff --git a/kube-prometheus/templates/prometheus-configmap.yaml b/kube-prometheus/templates/prometheus-configmap.yaml index 6c0312cd..33d0b040 100644 --- a/kube-prometheus/templates/prometheus-configmap.yaml +++ b/kube-prometheus/templates/prometheus-configmap.yaml @@ -6,9 +6,9 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version }} component: prometheus heritage: {{ .Release.Service }} - prometheus: "{{ .Values.prometheus.name }}" + prometheus: k8s release: {{ .Release.Name }} - name: prometheus-{{ .Values.prometheus.name }} + name: prometheus-k8s data: prometheus.yaml: | alerting: @@ -17,7 +17,7 @@ data: - role: endpoints relabel_configs: - action: keep - regex: alertmanager-{{ .Values.alertmanager.name }} + regex: alertmanager-main source_labels: - __meta_kubernetes_service_name - action: keep @@ -84,7 +84,7 @@ data: - role: endpoints relabel_configs: - action: keep - regex: prometheus-{{ .Values.prometheus.name }}|{{ template "fullname" . }}-node-exporter|{{ template "fullname" . }}-kube-state-metrics + regex: prometheus-k8s|{{ template "fullname" . }}-node-exporter|{{ template "fullname" . }}-kube-state-metrics source_labels: [__meta_kubernetes_service_name] - action: replace source_labels: [__meta_kubernetes_service_name] diff --git a/kube-prometheus/templates/prometheus-ingress.yaml b/kube-prometheus/templates/prometheus-ingress.yaml index 0905410a..e308030c 100644 --- a/kube-prometheus/templates/prometheus-ingress.yaml +++ b/kube-prometheus/templates/prometheus-ingress.yaml @@ -13,7 +13,7 @@ metadata: chart: {{ $root.Chart.Name }}-{{ $root.Chart.Version }} component: prometheus heritage: {{ $root.Release.Service }} - prometheus: "{{ $root.Values.prometheus.name }}" + prometheus: k8s release: {{ $root.Release.Name }} name: {{ template "fullname" $root }}-prometheus-{{ .name }} spec: @@ -24,7 +24,7 @@ spec: paths: - path: / backend: - serviceName: prometheus-{{ $root.Values.prometheus.name }} + serviceName: prometheus-k8s servicePort: 9090 {{- end }} {{- if .tls }} diff --git a/kube-prometheus/templates/prometheus-rules.yaml b/kube-prometheus/templates/prometheus-rules.yaml index 1f73c413..d6032ff5 100644 --- a/kube-prometheus/templates/prometheus-rules.yaml +++ b/kube-prometheus/templates/prometheus-rules.yaml @@ -6,8 +6,8 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version }} component: prometheus heritage: {{ .Release.Service }} - prometheus: "{{ .Values.prometheus.name }}" + prometheus: k8s release: {{ .Release.Name }} - name: prometheus-{{ .Values.prometheus.name }}-rules + name: prometheus-k8s-rules data: {{ (.Files.Glob "rules/*.rules").AsConfig | indent 2 }} diff --git a/kube-prometheus/templates/prometheus-service.yaml b/kube-prometheus/templates/prometheus-service.yaml index 3b1be601..39e6ec1d 100644 --- a/kube-prometheus/templates/prometheus-service.yaml +++ b/kube-prometheus/templates/prometheus-service.yaml @@ -10,9 +10,9 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version }} component: prometheus heritage: {{ .Release.Service }} - prometheus: "{{ .Values.prometheus.name }}" + prometheus: k8s release: {{ .Release.Name }} - name: prometheus-{{ .Values.prometheus.name }} + name: prometheus-k8s spec: ports: - name: http @@ -23,5 +23,5 @@ spec: targetPort: 9090 protocol: TCP selector: - prometheus: prometheus-{{ .Values.prometheus.name }} + prometheus: prometheus-k8s type: "{{ .Values.prometheus.service.type }}" diff --git a/kube-prometheus/values.yaml b/kube-prometheus/values.yaml index 26542e50..7619ca62 100644 --- a/kube-prometheus/values.yaml +++ b/kube-prometheus/values.yaml @@ -9,11 +9,6 @@ global: pullPolicy: IfNotPresent alertmanager: - ## name to use when creating Alertmanager StatefulSet & Service - ## Ex: 'main' will create resources named 'alertmanager-main' - ## - name: main - ## if true, delete Alertmanager StatefulSet & Service when release is deleted ## cleanup: true @@ -198,11 +193,6 @@ nodeExporter: nodePort: 9100 prometheus: - ## name to use when creating Prometheus StatefulSet & Service - ## Ex: 'k8s' will create resources named 'prometheus-k8s' - ## - name: k8s - ## if true, delete Prometheus StatefulSet & Service when release is deleted ## cleanup: true From 052da0361c06e6ba2b95c2079d728fa73fbf4beb Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Wed, 15 Feb 2017 22:17:35 -0600 Subject: [PATCH 14/41] prometheus-operator v0.4.0 --- prometheus-operator/Chart.yaml | 5 ++-- prometheus-operator/templates/NOTES.txt | 2 +- .../delete-alertmanager-svc-job.yaml | 25 ------------------- .../templates/delete-prometheus-svc-job.yaml | 25 ------------------- prometheus-operator/templates/deployment.yaml | 4 +-- prometheus-operator/values.yaml | 11 ++++---- 6 files changed, 10 insertions(+), 62 deletions(-) delete mode 100644 prometheus-operator/templates/delete-alertmanager-svc-job.yaml delete mode 100644 prometheus-operator/templates/delete-prometheus-svc-job.yaml diff --git a/prometheus-operator/Chart.yaml b/prometheus-operator/Chart.yaml index 4fa7696d..8a0f09af 100644 --- a/prometheus-operator/Chart.yaml +++ b/prometheus-operator/Chart.yaml @@ -1,9 +1,10 @@ apiVersion: v1 description: Provides easy monitoring definitions for Kubernetes services, and deployment and management of Prometheus instances. engine: gotpl -home: https://github.com/coreos/prometheus-operator maintainers: - name: Michael Goodness email: mgoodness@gmail.com name: prometheus-operator -version: 0.1.3 +sources: + - https://github.com/coreos/prometheus-operator +version: 0.4.0-1 diff --git a/prometheus-operator/templates/NOTES.txt b/prometheus-operator/templates/NOTES.txt index 42b5cd73..ef0d97e4 100644 --- a/prometheus-operator/templates/NOTES.txt +++ b/prometheus-operator/templates/NOTES.txt @@ -3,4 +3,4 @@ The Prometheus Operator has been installed. Check its status by running: -l "app={{ template "name" . }},release={{ .Release.Name }}" Visit https://github.com/coreos/prometheus-operator for instructions on how -to create & configure Alertmanager and Prometheus instances using the Operator. +to create & configure AlertManager and Prometheus instances using the Operator. diff --git a/prometheus-operator/templates/delete-alertmanager-svc-job.yaml b/prometheus-operator/templates/delete-alertmanager-svc-job.yaml deleted file mode 100644 index 200ef6c9..00000000 --- a/prometheus-operator/templates/delete-alertmanager-svc-job.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - annotations: - helm.sh/hook: post-delete - labels: - app: {{ template "name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-alertmanager-svc -spec: - template: - metadata: - labels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-alertmanager-svc - spec: - containers: - - name: hyperkube - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","svc/alertmanager"] - restartPolicy: Never diff --git a/prometheus-operator/templates/delete-prometheus-svc-job.yaml b/prometheus-operator/templates/delete-prometheus-svc-job.yaml deleted file mode 100644 index 74c125cf..00000000 --- a/prometheus-operator/templates/delete-prometheus-svc-job.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - annotations: - helm.sh/hook: post-delete - labels: - app: {{ template "name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-prometheus-svc -spec: - template: - metadata: - labels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-prometheus-svc - spec: - containers: - - name: hyperkube - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","svc/prometheus"] - restartPolicy: Never diff --git a/prometheus-operator/templates/deployment.yaml b/prometheus-operator/templates/deployment.yaml index cbcf112c..29df063a 100644 --- a/prometheus-operator/templates/deployment.yaml +++ b/prometheus-operator/templates/deployment.yaml @@ -21,9 +21,7 @@ spec: - name: {{ template "name" . }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" imagePullPolicy: "{{ .Values.image.pullPolicy }}" - {{- if not .Values.sendAnalytics }} args: - - --analytics=false - {{- end }} + - --analytics={{ .Values.sendAnalytics }} resources: {{ toYaml .Values.resources | indent 12 }} diff --git a/prometheus-operator/values.yaml b/prometheus-operator/values.yaml index a1c5a6d7..8b565a75 100644 --- a/prometheus-operator/values.yaml +++ b/prometheus-operator/values.yaml @@ -1,25 +1,24 @@ global: - ## hyperkube image to use when getting/deleting ThirdPartyResources - ## created by prometheus-operator. + ## Hyperkube image to use when getting/deleting ThirdPartyResources ## hyperkube: repository: quay.io/coreos/hyperkube tag: v1.5.2_coreos.1 pullPolicy: IfNotPresent -## prometheus-operator image +## Prometheus-operator image ## image: repository: quay.io/coreos/prometheus-operator - tag: v0.2.3 + tag: v0.4.0 pullPolicy: IfNotPresent -## if true, collect & send anonymous usage statistics +## If true, collect & send anonymous usage statistics ## Ref: https://github.com/coreos/prometheus-operator#installation ## sendAnalytics: true -## prometheus-operator resource limits & requests +## Prometheus-operator resource limits & requests ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ ## resources: From eff1967c07b5be6625a6e650a2fe9eafac9e760d Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Wed, 15 Feb 2017 22:17:59 -0600 Subject: [PATCH 15/41] standalone prometheus (TPR) chart --- prometheus/.helmignore | 21 ++++ prometheus/Chart.yaml | 10 ++ prometheus/templates/NOTES.txt | 1 + prometheus/templates/_helpers.tpl | 16 +++ prometheus/templates/prometheus-config.yaml | 15 +++ prometheus/templates/prometheus-ingress.yaml | 29 +++++ prometheus/templates/prometheus-rules.yaml | 12 ++ prometheus/templates/prometheus-service.yaml | 39 +++++++ prometheus/templates/prometheus.yaml | 41 +++++++ prometheus/values.yaml | 113 +++++++++++++++++++ 10 files changed, 297 insertions(+) create mode 100644 prometheus/.helmignore create mode 100644 prometheus/Chart.yaml create mode 100644 prometheus/templates/NOTES.txt create mode 100644 prometheus/templates/_helpers.tpl create mode 100644 prometheus/templates/prometheus-config.yaml create mode 100644 prometheus/templates/prometheus-ingress.yaml create mode 100644 prometheus/templates/prometheus-rules.yaml create mode 100644 prometheus/templates/prometheus-service.yaml create mode 100644 prometheus/templates/prometheus.yaml create mode 100644 prometheus/values.yaml diff --git a/prometheus/.helmignore b/prometheus/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/prometheus/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/prometheus/Chart.yaml b/prometheus/Chart.yaml new file mode 100644 index 00000000..c471741d --- /dev/null +++ b/prometheus/Chart.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +description: Prometheus instance created by the CoreOS Prometheus Operator +engine: gotpl +maintainers: + - name: Michael Goodness + email: mgoodness@gmail.com +name: prometheus +sources: + - https://github.com/coreos/prometheus-operator +version: 0.4.0-1 diff --git a/prometheus/templates/NOTES.txt b/prometheus/templates/NOTES.txt new file mode 100644 index 00000000..66d02411 --- /dev/null +++ b/prometheus/templates/NOTES.txt @@ -0,0 +1 @@ +A new Prometheus instance has been created. diff --git a/prometheus/templates/_helpers.tpl b/prometheus/templates/_helpers.tpl new file mode 100644 index 00000000..f0d83d2e --- /dev/null +++ b/prometheus/templates/_helpers.tpl @@ -0,0 +1,16 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "fullname" -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/prometheus/templates/prometheus-config.yaml b/prometheus/templates/prometheus-config.yaml new file mode 100644 index 00000000..750a0cec --- /dev/null +++ b/prometheus/templates/prometheus-config.yaml @@ -0,0 +1,15 @@ +{{- if not .Values.serviceMonitorSelector }} +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + prometheus: {{ .Release.Name }} + release: {{ .Release.Name }} + name: prometheus-{{ .Release.Name }} +data: + prometheus.yaml: |- +{{ toYaml .Values.config | indent 4 }} +{{- end }} diff --git a/prometheus/templates/prometheus-ingress.yaml b/prometheus/templates/prometheus-ingress.yaml new file mode 100644 index 00000000..529898db --- /dev/null +++ b/prometheus/templates/prometheus-ingress.yaml @@ -0,0 +1,29 @@ +{{- if .Values.ingress.enabled }} +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: +{{- if .Values.ingress.annotations }} + annotations: +{{ toYaml .Values.ingress.annotations | indent 4 }} +{{- end }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + prometheus: {{ .Release.Name }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }} +spec: + rules: + - host: "{{ .Values.ingress.fqdn }}" + http: + paths: + - path: "{{ .Values.routePrefix }}" + backend: + serviceName: {{ template "fullname" . }} + servicePort: 9090 +{{- if .Values.ingress.tls }} + tls: +{{ toYaml .Values.ingress.tls | indent 4 }} +{{- end }} +{{- end }} diff --git a/prometheus/templates/prometheus-rules.yaml b/prometheus/templates/prometheus-rules.yaml new file mode 100644 index 00000000..47a0def6 --- /dev/null +++ b/prometheus/templates/prometheus-rules.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + prometheus: {{ .Release.Name }} + release: {{ .Release.Name }} + name: prometheus-{{ .Release.Name }}-rules +data: +{{ toYaml .Values.rules | indent 2 }} diff --git a/prometheus/templates/prometheus-service.yaml b/prometheus/templates/prometheus-service.yaml new file mode 100644 index 00000000..2b7b6527 --- /dev/null +++ b/prometheus/templates/prometheus-service.yaml @@ -0,0 +1,39 @@ +apiVersion: v1 +kind: Service +metadata: +{{- if .Values.service.annotations }} + annotations: +{{ toYaml .Values.service.annotations | indent 4 }} +{{- end }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + prometheus: {{ .Release.Name }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }} +spec: + clusterIP: "{{ .Values.service.clusterIP }}" +{{- if .Values.service.externalIPs }} + externalIPs: +{{ toYaml .Values.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.service.loadBalancerIP }} + loadBalancerIP: "{{ .Values.service.loadBalancerIP }}" +{{- end }} +{{- if .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: +{{ toYaml .Values.service.loadBalancerSourceRanges | indent 4 }} +{{- end }} + ports: + - name: http + {{- if eq .Values.service.type "NodePort" }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + port: 9090 + targetPort: 9090 + protocol: TCP + selector: + app: prometheus + prometheus: {{ .Release.Name }} + type: "{{ .Values.service.type }}" diff --git a/prometheus/templates/prometheus.yaml b/prometheus/templates/prometheus.yaml new file mode 100644 index 00000000..1526b317 --- /dev/null +++ b/prometheus/templates/prometheus.yaml @@ -0,0 +1,41 @@ +apiVersion: monitoring.coreos.com/v1alpha1 +kind: Prometheus +metadata: + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + prometheus: {{ .Release.Name }} + release: {{ .Release.Name }} + name: {{ .Release.Name }} +spec: +{{- if .Values.alertingEndpoints }} + alerting: + alertmanagers: +{{ toYaml .Values.alertingEndpoints | indent 6 }} +{{- end }} + baseImage: "{{ .Values.image.repository }}" +{{- if .Values.externalUrl }} + externalUrl: "{{ .Values.externalUrl }}" +{{- end }} +{{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 4 }} +{{- end }} + paused: {{ .Values.paused }} + replicas: {{ .Values.replicaCount }} + resources: +{{ toYaml .Values.resources | indent 4 }} + retention: "{{ .Values.retention }}" +{{- if .Values.routePrefix }} + routePrefix: "{{ .Values.routePrefix }}" +{{- end }} +{{- if .Values.serviceMonitorSelector }} + serviceMonitorSelector: +{{ toYaml .Values.serviceMonitorSelector | indent 4 }} +{{- end }} +{{- if .Values.storageSpec }} + storage: +{{ toYaml .Values.storageSpec | indent 4 }} +{{- end }} + version: "{{ .Values.image.tag }}" diff --git a/prometheus/values.yaml b/prometheus/values.yaml new file mode 100644 index 00000000..c76e39fd --- /dev/null +++ b/prometheus/values.yaml @@ -0,0 +1,113 @@ +global: + ## Hyperkube image to use when getting & deleting ThirdPartyResources + ## + hyperkube: + repository: quay.io/coreos/hyperkube + tag: v1.5.2_coreos.1 + pullPolicy: IfNotPresent + +## AlertManagers to which alerts will be sent +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#alertmanagerendpoints +## +alertingEndpoints: [] +# - name: "" +# namespace: "" +# port: 9093 +# scheme: http + +config: {} + +## External URL at which Prometheus will be reachable +## +externalUrl: "" + +## Prometheus container image +## +image: + repository: quay.io/prometheus/prometheus + tag: v1.5.2 + +ingress: + ## If true, Prometheus Ingress will be created + ## + enabled: false + + ## Annotations for Prometheus Ingress + ## + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: 'true' + + fqdn: "" + + ## TLS configuration for Prometheus Ingress + ## Secret must be manually created in the namespace + ## + tls: [] + # - secretName: prometheus-k8s-tls + # hosts: + # - prometheus.example.com + +## Node labels for Prometheus pod assignment +## Ref: https://kubernetes.io/docs/user-guide/node-selection/ +## +nodeSelector: {} + +## If true, the Operator won't process any Prometheus configuration changes +## +paused: false + +## Number of Prometheus replicas desired +## +replicaCount: 1 + +## How long to retain metrics +## +retention: 24h + +## Resource limits & requests +## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ +## +resources: + requests: + memory: 400Mi + +## Prefix used to register routes, overriding externalUrl route. +## Useful for proxies that rewrite URLs. +## +routePrefix: "/" + +rules: {} + +service: + ## Annotations to be added to the Service + ## + annotations: {} + + clusterIP: "" + externalIPs: [] + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30900 + + ## Service type + ## + type: NodePort + +## The ServiceMonitor TPRs to be covered by the Prometheus instances. +## +serviceMonitorSelector: {} + +## Prometheus StorageSpec for persistent data +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec +## +storageSpec: {} +# class: default +# selector: {} +# resources: +# requests: +# storage: 16Gi From 29a04c4e145856c65efcc287e392ae698c547530 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Thu, 2 Mar 2017 10:20:52 -0600 Subject: [PATCH 16/41] Operator v0.6.0; improved cleanup --- prometheus-operator/Chart.yaml | 2 +- .../templates/cleanup-job.yaml | 41 +++++++++++++++++++ .../delete-alertmanager-tpr-job.yaml | 25 ----------- .../templates/delete-prometheus-tpr-job.yaml | 25 ----------- .../delete-servicemonitor-tpr-job.yaml | 25 ----------- .../templates/get-alertmanager-tpr-job.yaml | 25 ----------- .../templates/get-servicemonitor-tpr-job.yaml | 25 ----------- ...metheus-tpr-job.yaml => get-tprs-job.yaml} | 12 ++++-- prometheus-operator/values.yaml | 20 ++++----- 9 files changed, 61 insertions(+), 139 deletions(-) create mode 100644 prometheus-operator/templates/cleanup-job.yaml delete mode 100644 prometheus-operator/templates/delete-alertmanager-tpr-job.yaml delete mode 100644 prometheus-operator/templates/delete-prometheus-tpr-job.yaml delete mode 100644 prometheus-operator/templates/delete-servicemonitor-tpr-job.yaml delete mode 100644 prometheus-operator/templates/get-alertmanager-tpr-job.yaml delete mode 100644 prometheus-operator/templates/get-servicemonitor-tpr-job.yaml rename prometheus-operator/templates/{get-prometheus-tpr-job.yaml => get-tprs-job.yaml} (64%) diff --git a/prometheus-operator/Chart.yaml b/prometheus-operator/Chart.yaml index 8a0f09af..8302ced7 100644 --- a/prometheus-operator/Chart.yaml +++ b/prometheus-operator/Chart.yaml @@ -7,4 +7,4 @@ maintainers: name: prometheus-operator sources: - https://github.com/coreos/prometheus-operator -version: 0.4.0-1 +version: 0.6.0-1 diff --git a/prometheus-operator/templates/cleanup-job.yaml b/prometheus-operator/templates/cleanup-job.yaml new file mode 100644 index 00000000..496dcd74 --- /dev/null +++ b/prometheus-operator/templates/cleanup-job.yaml @@ -0,0 +1,41 @@ +apiVersion: batch/v1 +kind: Job +metadata: + annotations: + helm.sh/hook: post-delete + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-cleanup +spec: + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }}-cleanup + spec: + containers: + - name: delete-services + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: + - /bin/sh + - -c + - for n in $(kubectl get namespaces -o jsonpath={..metadata.name}); do \ + kubectl delete --ignore-not-found --namespace $n services \ + prometheus-operated alertmanager-operated; done + - name: delete-tprs + image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" + imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" + command: + - ./kubectl + - delete + - --ignore-not-found + - thirdpartyresource + - alertmanager.monitoring.coreos.com + - prometheus.monitoring.coreos.com + - service-monitor.monitoring.coreos.com + restartPolicy: OnFailure diff --git a/prometheus-operator/templates/delete-alertmanager-tpr-job.yaml b/prometheus-operator/templates/delete-alertmanager-tpr-job.yaml deleted file mode 100644 index f71ca5be..00000000 --- a/prometheus-operator/templates/delete-alertmanager-tpr-job.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - annotations: - helm.sh/hook: post-delete - labels: - app: {{ template "name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-alertmanager-tpr -spec: - template: - metadata: - labels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-alertmanager-tpr - spec: - containers: - - name: hyperkube - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","thirdpartyresource/alertmanager.monitoring.coreos.com"] - restartPolicy: Never diff --git a/prometheus-operator/templates/delete-prometheus-tpr-job.yaml b/prometheus-operator/templates/delete-prometheus-tpr-job.yaml deleted file mode 100644 index 1021901b..00000000 --- a/prometheus-operator/templates/delete-prometheus-tpr-job.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - annotations: - helm.sh/hook: post-delete - labels: - app: {{ template "name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-prometheus-tpr -spec: - template: - metadata: - labels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-prometheus-tpr - spec: - containers: - - name: hyperkube - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","thirdpartyresource/prometheus.monitoring.coreos.com"] - restartPolicy: Never diff --git a/prometheus-operator/templates/delete-servicemonitor-tpr-job.yaml b/prometheus-operator/templates/delete-servicemonitor-tpr-job.yaml deleted file mode 100644 index 4f5ead25..00000000 --- a/prometheus-operator/templates/delete-servicemonitor-tpr-job.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - annotations: - helm.sh/hook: post-delete - labels: - app: {{ template "name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-servicemonitor-tpr -spec: - template: - metadata: - labels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-delete-servicemonitor-tpr - spec: - containers: - - name: hyperkube - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","delete","thirdpartyresource/service-monitor.monitoring.coreos.com"] - restartPolicy: Never diff --git a/prometheus-operator/templates/get-alertmanager-tpr-job.yaml b/prometheus-operator/templates/get-alertmanager-tpr-job.yaml deleted file mode 100644 index b803d31b..00000000 --- a/prometheus-operator/templates/get-alertmanager-tpr-job.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - annotations: - helm.sh/hook: post-install - labels: - app: {{ template "name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-alertmanager-tpr -spec: - template: - metadata: - labels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-alertmanager-tpr - spec: - containers: - - name: hyperkube - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","get","alertmanager"] - restartPolicy: OnFailure diff --git a/prometheus-operator/templates/get-servicemonitor-tpr-job.yaml b/prometheus-operator/templates/get-servicemonitor-tpr-job.yaml deleted file mode 100644 index e001efbb..00000000 --- a/prometheus-operator/templates/get-servicemonitor-tpr-job.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - annotations: - helm.sh/hook: post-install - labels: - app: {{ template "name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-servicemonitor-tpr -spec: - template: - metadata: - labels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-servicemonitor-tpr - spec: - containers: - - name: hyperkube - image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" - imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","get","servicemonitor"] - restartPolicy: OnFailure diff --git a/prometheus-operator/templates/get-prometheus-tpr-job.yaml b/prometheus-operator/templates/get-tprs-job.yaml similarity index 64% rename from prometheus-operator/templates/get-prometheus-tpr-job.yaml rename to prometheus-operator/templates/get-tprs-job.yaml index 4f79aff6..0efdae92 100644 --- a/prometheus-operator/templates/get-prometheus-tpr-job.yaml +++ b/prometheus-operator/templates/get-tprs-job.yaml @@ -8,18 +8,24 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version }} heritage: {{ .Release.Service }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-prometheus-tpr + name: {{ template "fullname" . }}-get-tprs spec: template: metadata: labels: app: {{ template "name" . }} release: {{ .Release.Name }} - name: {{ template "fullname" . }}-get-prometheus-tpr + name: {{ template "fullname" . }}-get-tprs spec: containers: - name: hyperkube image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" - command: ["./kubectl","-n","{{ .Release.Namespace }}","get","prometheus"] + command: + - ./kubectl + - get + - thirdpartyresource + - alertmanager.monitoring.coreos.com + - prometheus.monitoring.coreos.com + - service-monitor.monitoring.coreos.com restartPolicy: OnFailure diff --git a/prometheus-operator/values.yaml b/prometheus-operator/values.yaml index 8b565a75..d8a9e1bb 100644 --- a/prometheus-operator/values.yaml +++ b/prometheus-operator/values.yaml @@ -1,16 +1,16 @@ global: - ## Hyperkube image to use when getting/deleting ThirdPartyResources + ## Hyperkube image to use when getting ThirdPartyResources & cleaning up ## hyperkube: repository: quay.io/coreos/hyperkube - tag: v1.5.2_coreos.1 + tag: v1.5.3_coreos.0 pullPolicy: IfNotPresent ## Prometheus-operator image ## image: repository: quay.io/coreos/prometheus-operator - tag: v0.4.0 + tag: v0.6.0 pullPolicy: IfNotPresent ## If true, collect & send anonymous usage statistics @@ -21,10 +21,10 @@ sendAnalytics: true ## Prometheus-operator resource limits & requests ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ ## -resources: - limits: - cpu: 200m - memory: 300Mi - requests: - cpu: 100m - memory: 50Mi +resources: {} + # limits: + # cpu: 200m + # memory: 300Mi + # requests: + # cpu: 100m + # memory: 50Mi From ff7ba216b472f085ba7bc4d95ab34b50b4442ce3 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Fri, 3 Mar 2017 11:29:17 -0600 Subject: [PATCH 17/41] Fixed operator cleanup job --- prometheus-operator/templates/cleanup-job.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/prometheus-operator/templates/cleanup-job.yaml b/prometheus-operator/templates/cleanup-job.yaml index 496dcd74..53ebee6b 100644 --- a/prometheus-operator/templates/cleanup-job.yaml +++ b/prometheus-operator/templates/cleanup-job.yaml @@ -22,16 +22,14 @@ spec: image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" command: - - /bin/sh + - /bin/bash - -c - - for n in $(kubectl get namespaces -o jsonpath={..metadata.name}); do \ - kubectl delete --ignore-not-found --namespace $n services \ - prometheus-operated alertmanager-operated; done + - for n in $(/kubectl get namespaces -o jsonpath={..metadata.name}); do /kubectl delete --ignore-not-found --namespace $n services prometheus-operated alertmanager-operated; done - name: delete-tprs image: "{{ .Values.global.hyperkube.repository }}:{{ .Values.global.hyperkube.tag }}" imagePullPolicy: "{{ .Values.global.hyperkube.pullPolicy }}" command: - - ./kubectl + - /kubectl - delete - --ignore-not-found - thirdpartyresource From 9c8abafa2e2e9bb68cea7ef2d129ebabee2789ab Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Fri, 3 Mar 2017 11:50:10 -0600 Subject: [PATCH 18/41] Allow nodeSelector for operator --- prometheus-operator/templates/deployment.yaml | 4 ++++ prometheus-operator/values.yaml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/prometheus-operator/templates/deployment.yaml b/prometheus-operator/templates/deployment.yaml index 29df063a..75d101e5 100644 --- a/prometheus-operator/templates/deployment.yaml +++ b/prometheus-operator/templates/deployment.yaml @@ -25,3 +25,7 @@ spec: - --analytics={{ .Values.sendAnalytics }} resources: {{ toYaml .Values.resources | indent 12 }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} diff --git a/prometheus-operator/values.yaml b/prometheus-operator/values.yaml index d8a9e1bb..8f6101cf 100644 --- a/prometheus-operator/values.yaml +++ b/prometheus-operator/values.yaml @@ -13,6 +13,10 @@ image: tag: v0.6.0 pullPolicy: IfNotPresent +## Node labels for prometheus-operator pod assignment +## +nodeSelector: {} + ## If true, collect & send anonymous usage statistics ## Ref: https://github.com/coreos/prometheus-operator#installation ## From cd51faaff92133f4576397c25ff89c6acfe716c7 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Fri, 3 Mar 2017 12:02:19 -0600 Subject: [PATCH 19/41] No default resources; ClusterIP service --- prometheus/Chart.yaml | 2 +- prometheus/values.yaml | 18 +++++------------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/prometheus/Chart.yaml b/prometheus/Chart.yaml index c471741d..0200ebb5 100644 --- a/prometheus/Chart.yaml +++ b/prometheus/Chart.yaml @@ -7,4 +7,4 @@ maintainers: name: prometheus sources: - https://github.com/coreos/prometheus-operator -version: 0.4.0-1 +version: 0.6.0-1 diff --git a/prometheus/values.yaml b/prometheus/values.yaml index c76e39fd..4bc3c0aa 100644 --- a/prometheus/values.yaml +++ b/prometheus/values.yaml @@ -1,11 +1,3 @@ -global: - ## Hyperkube image to use when getting & deleting ThirdPartyResources - ## - hyperkube: - repository: quay.io/coreos/hyperkube - tag: v1.5.2_coreos.1 - pullPolicy: IfNotPresent - ## AlertManagers to which alerts will be sent ## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#alertmanagerendpoints ## @@ -68,9 +60,9 @@ retention: 24h ## Resource limits & requests ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ ## -resources: - requests: - memory: 400Mi +resources: {} + # requests: + # memory: 400Mi ## Prefix used to register routes, overriding externalUrl route. ## Useful for proxies that rewrite URLs. @@ -96,9 +88,9 @@ service: ## Service type ## - type: NodePort + type: ClusterIP -## The ServiceMonitor TPRs to be covered by the Prometheus instances. +## The ServiceMonitor TPRs to be covered by the Prometheus instance. ## serviceMonitorSelector: {} From 2041bf0a193ea509346519856ce20c6d4e78cd56 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Fri, 3 Mar 2017 12:40:07 -0600 Subject: [PATCH 20/41] Only create configmap if there's config --- prometheus/templates/prometheus-config.yaml | 2 +- prometheus/values.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/prometheus/templates/prometheus-config.yaml b/prometheus/templates/prometheus-config.yaml index 750a0cec..d95cc94b 100644 --- a/prometheus/templates/prometheus-config.yaml +++ b/prometheus/templates/prometheus-config.yaml @@ -1,4 +1,4 @@ -{{- if not .Values.serviceMonitorSelector }} +{{- if and ( .Values.config ) ( not .Values.serviceMonitorSelector ) }} apiVersion: v1 kind: ConfigMap metadata: diff --git a/prometheus/values.yaml b/prometheus/values.yaml index 4bc3c0aa..1968a277 100644 --- a/prometheus/values.yaml +++ b/prometheus/values.yaml @@ -28,7 +28,7 @@ ingress: ## annotations: {} # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: 'true' + # kubernetes.io/tls-acme: "true" fqdn: "" @@ -99,7 +99,7 @@ serviceMonitorSelector: {} ## storageSpec: {} # class: default -# selector: {} # resources: # requests: # storage: 16Gi +# selector: {} From 5a689f7787f9ce362faebcecd072b8f0be476ad9 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Fri, 3 Mar 2017 19:49:16 -0600 Subject: [PATCH 21/41] Created alertmanager chart --- alertmanager/.helmignore | 21 +++++ alertmanager/Chart.yaml | 10 +++ alertmanager/templates/NOTES.txt | 1 + alertmanager/templates/_helpers.tpl | 16 ++++ .../templates/alertmanager-config.yaml | 28 +++++++ .../templates/alertmanager-ingress.yaml | 29 +++++++ .../templates/alertmanager-service.yaml | 39 +++++++++ alertmanager/templates/alertmanager.yaml | 28 +++++++ alertmanager/values.yaml | 81 +++++++++++++++++++ prometheus/templates/prometheus-service.yaml | 2 +- 10 files changed, 254 insertions(+), 1 deletion(-) create mode 100644 alertmanager/.helmignore create mode 100644 alertmanager/Chart.yaml create mode 100644 alertmanager/templates/NOTES.txt create mode 100644 alertmanager/templates/_helpers.tpl create mode 100644 alertmanager/templates/alertmanager-config.yaml create mode 100644 alertmanager/templates/alertmanager-ingress.yaml create mode 100644 alertmanager/templates/alertmanager-service.yaml create mode 100644 alertmanager/templates/alertmanager.yaml create mode 100644 alertmanager/values.yaml diff --git a/alertmanager/.helmignore b/alertmanager/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/alertmanager/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/alertmanager/Chart.yaml b/alertmanager/Chart.yaml new file mode 100644 index 00000000..695ff38a --- /dev/null +++ b/alertmanager/Chart.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +description: AlertManager instance created by the CoreOS Prometheus Operator +engine: gotpl +maintainers: + - name: Michael Goodness + email: mgoodness@gmail.com +name: alertmanager +sources: + - https://github.com/coreos/prometheus-operator +version: 0.6.0-1 diff --git a/alertmanager/templates/NOTES.txt b/alertmanager/templates/NOTES.txt new file mode 100644 index 00000000..01c5734c --- /dev/null +++ b/alertmanager/templates/NOTES.txt @@ -0,0 +1 @@ +A new AlertManager instance has been created. diff --git a/alertmanager/templates/_helpers.tpl b/alertmanager/templates/_helpers.tpl new file mode 100644 index 00000000..f0d83d2e --- /dev/null +++ b/alertmanager/templates/_helpers.tpl @@ -0,0 +1,16 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "fullname" -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/alertmanager/templates/alertmanager-config.yaml b/alertmanager/templates/alertmanager-config.yaml new file mode 100644 index 00000000..b218f670 --- /dev/null +++ b/alertmanager/templates/alertmanager-config.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + alertmanager: {{ .Release.Name }} + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: alertmanager-{{ .Release.Name }} +data: + alertmanager.yaml: |- + {{- if .Values.config }} +{{ toYaml .Values.config | indent 4 }} + {{- else }} + global: + resolve_timeout: 5m + receivers: + - name: 'webhook' + webhook_configs: + - url: 'http://alertmanagerwh:30500/' + route: + group_by: ['job'] + group_interval: 5m + group_wait: 30s + receiver: 'webhook' + repeat_interval: 12h + {{- end }} diff --git a/alertmanager/templates/alertmanager-ingress.yaml b/alertmanager/templates/alertmanager-ingress.yaml new file mode 100644 index 00000000..bf55768f --- /dev/null +++ b/alertmanager/templates/alertmanager-ingress.yaml @@ -0,0 +1,29 @@ +{{- if .Values.ingress.enabled }} +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: +{{- if .Values.ingress.annotations }} + annotations: +{{ toYaml .Values.ingress.annotations | indent 4 }} +{{- end }} + labels: + alertmanager: {{ .Release.Name }} + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }} +spec: + rules: + - host: "{{ .Values.ingress.fqdn }}" + http: + paths: + - path: "{{ .Values.routePrefix }}" + backend: + serviceName: {{ template "fullname" . }} + servicePort: 9093 +{{- if .Values.ingress.tls }} + tls: +{{ toYaml .Values.ingress.tls | indent 4 }} +{{- end }} +{{- end }} diff --git a/alertmanager/templates/alertmanager-service.yaml b/alertmanager/templates/alertmanager-service.yaml new file mode 100644 index 00000000..03c60aa4 --- /dev/null +++ b/alertmanager/templates/alertmanager-service.yaml @@ -0,0 +1,39 @@ +apiVersion: v1 +kind: Service +metadata: +{{- if .Values.service.annotations }} + annotations: +{{ toYaml .Values.service.annotations | indent 4 }} +{{- end }} + labels: + alertmanager: {{ .Release.Name }} + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "fullname" . }} +spec: + clusterIP: "{{ .Values.service.clusterIP }}" +{{- if .Values.service.externalIPs }} + externalIPs: +{{ toYaml .Values.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.service.loadBalancerIP }} + loadBalancerIP: "{{ .Values.service.loadBalancerIP }}" +{{- end }} +{{- if .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: +{{ toYaml .Values.service.loadBalancerSourceRanges | indent 4 }} +{{- end }} + ports: + - name: http + {{- if eq .Values.service.type "NodePort" }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + port: 9093 + targetPort: 9093 + protocol: TCP + selector: + alertmanager: {{ .Release.Name }} + app: {{ template "name" . }} + type: "{{ .Values.service.type }}" diff --git a/alertmanager/templates/alertmanager.yaml b/alertmanager/templates/alertmanager.yaml new file mode 100644 index 00000000..37434c4a --- /dev/null +++ b/alertmanager/templates/alertmanager.yaml @@ -0,0 +1,28 @@ +apiVersion: monitoring.coreos.com/v1alpha1 +kind: Alertmanager +metadata: + labels: + alertmanager: {{ .Release.Name }} + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ .Release.Name }} +spec: + baseImage: "{{ .Values.image.repository }}" +{{- if .Values.externalUrl }} + externalUrl: "{{ .Values.externalUrl }}" +{{- end }} +# {{- if .Values.nodeSelector }} +# nodeSelector: +# {{ toYaml .Values.nodeSelector | indent 4 }} +# {{- end }} + paused: {{ .Values.paused }} + replicas: {{ .Values.replicaCount }} +# resources: +# {{ toYaml .Values.resources | indent 4 }} +{{- if .Values.storageSpec }} + storage: +{{ toYaml .Values.storageSpec | indent 4 }} +{{- end }} + version: "{{ .Values.image.tag }}" diff --git a/alertmanager/values.yaml b/alertmanager/values.yaml new file mode 100644 index 00000000..04e678ad --- /dev/null +++ b/alertmanager/values.yaml @@ -0,0 +1,81 @@ +config: {} + +## External URL at which AlertManager will be reachable +## +externalUrl: "" + +## AlertManager container image +## +image: + repository: quay.io/prometheus/alertmanager + tag: v0.5.1 + +ingress: + ## If true, AlertManager Ingress will be created + ## + enabled: false + + ## Annotations for AlertManager Ingress + ## + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + + fqdn: "" + + ## TLS configuration for AlertManager Ingress + ## Secret must be manually created in the namespace + ## + tls: [] + # - secretName: alertmanager-general-tls + # hosts: + # - alertmanager.example.com + +## Node labels for AlertManager pod assignment +## Ref: https://kubernetes.io/docs/user-guide/node-selection/ +## +nodeSelector: {} + +## If true, the Operator won't process any AlertManager configuration changes +## +paused: false + +## Number of AlertManager replicas desired +## +replicaCount: 1 + +## Resource limits & requests +## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ +## +resources: {} + # requests: + # memory: 400Mi + +service: + ## Annotations to be added to the Service + ## + annotations: {} + + clusterIP: "" + externalIPs: [] + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30903 + + ## Service type + ## + type: ClusterIP + +## AlertManager StorageSpec for persistent data +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec +## +storageSpec: {} +# class: default +# resources: +# requests: +# storage: 2Gi +# selector: {} diff --git a/prometheus/templates/prometheus-service.yaml b/prometheus/templates/prometheus-service.yaml index 2b7b6527..9d14791e 100644 --- a/prometheus/templates/prometheus-service.yaml +++ b/prometheus/templates/prometheus-service.yaml @@ -34,6 +34,6 @@ spec: targetPort: 9090 protocol: TCP selector: - app: prometheus + app: {{ template "name" . }} prometheus: {{ .Release.Name }} type: "{{ .Values.service.type }}" From 4532650cbeab3098db5830a2fa1fc7c920ecc119 Mon Sep 17 00:00:00 2001 From: Michael Goodness Date: Sat, 4 Mar 2017 13:43:28 -0600 Subject: [PATCH 22/41] Prometheus chart creates ServiceMonitors --- prometheus/templates/prometheus.yaml | 6 +- prometheus/templates/servicemonitors.yaml | 33 ++++++++++ prometheus/values.yaml | 78 ++++++++++++++++++++++- 3 files changed, 113 insertions(+), 4 deletions(-) create mode 100644 prometheus/templates/servicemonitors.yaml diff --git a/prometheus/templates/prometheus.yaml b/prometheus/templates/prometheus.yaml index 1526b317..9a27230e 100644 --- a/prometheus/templates/prometheus.yaml +++ b/prometheus/templates/prometheus.yaml @@ -30,9 +30,11 @@ spec: {{- if .Values.routePrefix }} routePrefix: "{{ .Values.routePrefix }}" {{- end }} -{{- if .Values.serviceMonitorSelector }} +{{- if .Values.serviceMonitors }} serviceMonitorSelector: -{{ toYaml .Values.serviceMonitorSelector | indent 4 }} + matchLabels: + app: {{ template "name" . }} + release: {{ .Release.Name }} {{- end }} {{- if .Values.storageSpec }} storage: diff --git a/prometheus/templates/servicemonitors.yaml b/prometheus/templates/servicemonitors.yaml new file mode 100644 index 00000000..f6deeedf --- /dev/null +++ b/prometheus/templates/servicemonitors.yaml @@ -0,0 +1,33 @@ +{{- $app := include "name" . -}} +{{- $releaseName := .Release.Name -}} +{{- $chartName := .Chart.Name -}} +{{- $chartVersion := .Chart.Version -}} +{{- $releaseService := .Release.Service -}} +{{- if .Values.serviceMonitors }} +apiVersion: v1 +kind: List +items: +{{- range .Values.serviceMonitors }} + - apiVersion: monitoring.coreos.com/v1alpha1 + kind: ServiceMonitor + metadata: + labels: + app: {{ $app }} + chart: {{ $chartName }}-{{ $chartVersion }} + heritage: {{ $releaseService }} + release: {{ $releaseName }} + name: {{ .name }} + spec: + endpoints: +{{ toYaml .endpoints | indent 8 }} + {{- if .jobLabel }} + jobLabel: {{ .jobLabel }} + {{- end }} + {{- if .namespaceSelector }} + namespaceSelector: +{{ toYaml .namespaceSelector | indent 8 }} + {{- end }} + selector: +{{ toYaml .selector | indent 8 }} +{{- end }} +{{- end }} diff --git a/prometheus/values.yaml b/prometheus/values.yaml index 1968a277..94b17901 100644 --- a/prometheus/values.yaml +++ b/prometheus/values.yaml @@ -90,9 +90,83 @@ service: ## type: ClusterIP -## The ServiceMonitor TPRs to be covered by the Prometheus instance. +## ServiceMonitor TPRs to create & be scraped by the Prometheus instance. ## -serviceMonitorSelector: {} +serviceMonitors: [] + ## Name of the ServiceMonitor to create + ## + # - name: "" + + ## Service label for use in assembling a job name of the form