Skip to content
This repository has been archived by the owner on Apr 2, 2024. It is now read-only.

Commit

Permalink
Merge branch 'master' into nhudson/add_struct_tags
Browse files Browse the repository at this point in the history
  • Loading branch information
nhudson authored Dec 20, 2022
2 parents f9afe09 + 19d8e3e commit a26f1f2
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 32 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ We use the following categories for changes:
- In order to reduce the overall load on the system, some internal database
metrics won't be collected as often as they used to. None of the affected
metrics is expected to change faster than its new collection interval [#1793]
- Aggregate metrics at global level to avoid legend pollution in dashboards [#1800]

### Fixed

Expand Down
15 changes: 15 additions & 0 deletions docs/mixin/alerts/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -342,3 +342,18 @@ groups:
summary: High uncompressed data.
description: "High uncompressed data in Promscale, on average, {{ $value }} uncompressed chunks per metric."
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCompressionLow.md
- alert: PromscalePostgreSQLSharedBuffersLow
expr: |
(
((promscale_sql_database_open_chunks_total_table_size + promscale_sql_database_open_chunks_total_index_size)
/
promscale_sql_database_shared_buffers_size)
> 1 )
for: 10m
labels:
severity: warning
annotations:
summary: Promscale database performance will be affected.
description: "Currently open chunks are {{ $value | humanizePercentage }} of PostgreSQL shared_buffers. This will impact database performance."
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscalePostgreSQLSharedBuffersLow.md

64 changes: 32 additions & 32 deletions docs/mixin/dashboards/promscale.json
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.5, rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.5, sum(rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
"interval": "2m",
"legendFormat": "p50",
"range": true,
Expand All @@ -669,7 +669,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.90, rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.9, sum(rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
"hide": false,
"interval": "2m",
"legendFormat": "p90",
Expand All @@ -682,7 +682,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.95, rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.95, sum(rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
"hide": false,
"interval": "2m",
"legendFormat": "p95",
Expand Down Expand Up @@ -794,7 +794,7 @@
},
"editorMode": "code",
"exemplar": true,
"expr": "rate(grpc_server_msg_received_total{grpc_method=~\"(WriteSpan|WriteSpanStream|Export)\",namespace=~\"$namespace\"}[$__rate_interval])",
"expr": "sum(rate(grpc_server_msg_received_total{grpc_method=~\"(WriteSpan|WriteSpanStream|Export)\",namespace=~\"$namespace\"}[$__rate_interval])) by(grpc_service)",
"interval": "2m",
"legendFormat": "{{ grpc_service }}",
"range": true,
Expand Down Expand Up @@ -890,7 +890,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "rate(grpc_server_handled_total{grpc_service=~\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_code=~\"Aborted|Unavailable|Internal|Unknown|Unimplemented|DataLoss\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval])",
"expr": "sum(rate(grpc_server_handled_total{grpc_service=~\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_code=~\"Aborted|Unavailable|Internal|Unknown|Unimplemented|DataLoss\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval])) by(grpc_code)",
"interval": "2m",
"legendFormat": "{{ grpc_code }}",
"range": true,
Expand Down Expand Up @@ -986,7 +986,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.5, rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.5, sum(rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
"interval": "2m",
"legendFormat": "p50 {{ type }}",
"range": true,
Expand All @@ -998,7 +998,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.9, rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.9, sum(rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
"hide": false,
"interval": "2m",
"legendFormat": "p90 {{ type }}",
Expand All @@ -1011,7 +1011,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.95, rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.95, sum(rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
"hide": false,
"interval": "2m",
"legendFormat": "p95 {{ type }}",
Expand Down Expand Up @@ -1640,7 +1640,7 @@
},
"editorMode": "code",
"exemplar": true,
"expr": "max by (job, instance) (promscale_sql_database_chunks_metrics_expired_count{namespace=~\"$namespace\"})",
"expr": "max(promscale_sql_database_chunks_metrics_expired_count{namespace=~\"$namespace\"})",
"interval": "",
"legendFormat": "metrics-expired",
"range": true,
Expand All @@ -1653,7 +1653,7 @@
},
"editorMode": "code",
"exemplar": true,
"expr": "max by (job, instance) (promscale_sql_database_chunks_metrics_uncompressed_count{namespace=~\"$namespace\"})",
"expr": "max(promscale_sql_database_chunks_metrics_uncompressed_count{namespace=~\"$namespace\"})",
"hide": false,
"interval": "",
"legendFormat": "metrics-uncompressed",
Expand All @@ -1667,7 +1667,7 @@
},
"editorMode": "code",
"exemplar": true,
"expr": "max by (job, instance) (promscale_sql_database_chunks_traces_expired_count{namespace=~\"$namespace\"})",
"expr": "max(promscale_sql_database_chunks_traces_expired_count{namespace=~\"$namespace\"})",
"hide": false,
"interval": "",
"legendFormat": "traces-expired",
Expand All @@ -1681,7 +1681,7 @@
},
"editorMode": "code",
"exemplar": true,
"expr": "max by (job, instance) (promscale_sql_database_chunks_traces_uncompressed_count{namespace=~\"$namespace\"})",
"expr": "max(promscale_sql_database_chunks_traces_uncompressed_count{namespace=~\"$namespace\"})",
"hide": false,
"interval": "",
"legendFormat": "traces-uncompressed",
Expand All @@ -1695,7 +1695,7 @@
},
"editorMode": "code",
"exemplar": true,
"expr": "max by (job, instance) (promscale_sql_database_chunks_metrics_delayed_compression_count{namespace=~\"$namespace\"})",
"expr": "max(promscale_sql_database_chunks_metrics_delayed_compression_count{namespace=~\"$namespace\"})",
"hide": false,
"interval": "",
"legendFormat": "metrics-compression-delayed",
Expand Down Expand Up @@ -1949,7 +1949,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.5, rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.5, sum(rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
"interval": "2m",
"legendFormat": "p50 - {{ method }}",
"range": true,
Expand All @@ -1961,7 +1961,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.9, rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.9, sum(rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
"hide": false,
"interval": "2m",
"legendFormat": "p90 - {{ method }}",
Expand All @@ -1974,7 +1974,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.95, rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.95, sum(rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
"hide": false,
"interval": "2m",
"legendFormat": "p95 - {{ method }}",
Expand Down Expand Up @@ -2070,7 +2070,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.5, rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.5, sum(rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
"interval": "",
"legendFormat": "p50 - {{ method }}",
"refId": "A"
Expand All @@ -2081,7 +2081,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.9, rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.9, sum(rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
"hide": false,
"interval": "",
"legendFormat": "p90 - {{ method }}",
Expand All @@ -2093,7 +2093,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "histogram_quantile(0.95, rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.95, sum(rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
"hide": false,
"interval": "",
"legendFormat": "p95 - {{ method }}",
Expand Down Expand Up @@ -2188,7 +2188,7 @@
"uid": "${datasource}"
},
"exemplar": true,
"expr": "rate(promscale_database_requests_total{namespace=~\"$namespace\"}[$__rate_interval])",
"expr": "sum(rate(promscale_database_requests_total{namespace=~\"$namespace\"}[$__rate_interval])) by(method)",
"interval": "2m",
"legendFormat": "{{ method }}",
"range": true,
Expand Down Expand Up @@ -2745,7 +2745,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_metrics_compression_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_metrics_compression_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"interval": "",
"legendFormat": "metrics-compression-total",
"range": true,
Expand All @@ -2758,7 +2758,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_metrics_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_metrics_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"hide": false,
"interval": "",
"legendFormat": "metrics-retention-total",
Expand All @@ -2772,7 +2772,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_traces_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_traces_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"hide": false,
"interval": "",
"legendFormat": "traces-retention-total",
Expand All @@ -2786,7 +2786,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_metrics_compression_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_metrics_compression_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"hide": false,
"interval": "",
"legendFormat": "metrics-compression-failures",
Expand All @@ -2800,7 +2800,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_metrics_retention_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_metrics_retention_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"hide": false,
"interval": "",
"legendFormat": "metrics-retention-failures",
Expand All @@ -2814,7 +2814,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_traces_retention_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_traces_retention_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"hide": false,
"interval": "",
"legendFormat": "traces-retention-failures",
Expand All @@ -2828,7 +2828,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_traces_compression_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_traces_compression_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"hide": false,
"interval": "",
"legendFormat": "traces-compression-failures",
Expand All @@ -2842,7 +2842,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_traces_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_traces_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
"hide": false,
"interval": "",
"legendFormat": "traces-compression-total",
Expand Down Expand Up @@ -3337,7 +3337,7 @@
},
"editorMode": "code",
"exemplar": true,
"expr": "rate(promscale_cache_query_hits_total{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval]) / rate(promscale_cache_queries_total{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval])",
"expr": "sum(rate(promscale_cache_query_hits_total{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval])) by(name) / sum(rate(promscale_cache_queries_total{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval])) by(name)",
"interval": "2m",
"legendFormat": "{{ name }}",
"range": true,
Expand Down Expand Up @@ -3434,7 +3434,7 @@
},
"editorMode": "code",
"exemplar": true,
"expr": "histogram_quantile(0.5, rate(promscale_cache_query_latency_microseconds_bucket{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.5, sum(rate(promscale_cache_query_latency_microseconds_bucket{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, name))",
"interval": "2m",
"legendFormat": "p50 - {{ name }}",
"range": true,
Expand All @@ -3447,7 +3447,7 @@
},
"editorMode": "code",
"exemplar": true,
"expr": "histogram_quantile(0.90, rate(promscale_cache_query_latency_microseconds_bucket{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.90, sum(rate(promscale_cache_query_latency_microseconds_bucket{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, name))",
"hide": false,
"interval": "2m",
"legendFormat": "p90 - {{ name }}",
Expand Down Expand Up @@ -3639,7 +3639,7 @@
},
"editorMode": "code",
"exemplar": true,
"expr": "promscale_cache_elements{namespace=~\"$namespace\",type=~\"$datatype\"} / promscale_cache_capacity_elements{namespace=~\"$namespace\",type=~\"$datatype\"}",
"expr": "sum(promscale_cache_elements{namespace=~\"$namespace\",type=~\"$datatype\"}) by(name) / sum(promscale_cache_capacity_elements{namespace=~\"$namespace\",type=~\"$datatype\"}) by(name)",
"interval": "",
"legendFormat": "{{ name }}",
"range": true,
Expand Down
Loading

0 comments on commit a26f1f2

Please sign in to comment.