Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a rule for a health check alert (#630) #689

Merged
merged 1 commit into from
Aug 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .github/workflows/mainnet-deploy-to-vms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,17 @@ jobs:
shell: bash
run: |
echo "TAG=${{ github.event.client_payload.tag }}" > .env && \
echo "${{ secrets.ENV_READ_ONLY }}" >> .env
echo '${{ secrets.ENV_READ_ONLY }}' >> .env

- name: Export application config
shell: bash
run: |
echo "${{ secrets.APP_MAINNET_DAVE_CONFIG }}" > ./node.yml && \
echo "${{ secrets.APP_MAINNET_BRIDGE_CONFIG }}" > ./bridge.yml && \
echo "${{ secrets.APP_MAINNET_ALERT_CONFIG }}" > ./monitoring/alertmanager/config.yml && \
echo "${{ secrets.APP_MAINNET_GRAFANA_CONFIG_ENV }}" > ./monitoring/grafana/config-overrides.env && \
echo "${{ secrets.APP_MAINNET_PROMETHEUS_CONFIG }}" > ./monitoring/prometheus/prometheus.yml && \
echo "${{ secrets.APP_MAINNET_PROMETHEUS_RULES_CONFIG }}" > ./monitoring/prometheus/rules.yml
echo '${{ secrets.APP_MAINNET_DAVE_CONFIG }}' > ./node.yml && \
echo '${{ secrets.APP_MAINNET_BRIDGE_CONFIG }}' > ./bridge.yml && \
echo '${{ secrets.APP_MAINNET_ALERT_CONFIG }}' > ./monitoring/alertmanager/config.yml && \
echo '${{ secrets.APP_MAINNET_GRAFANA_CONFIG_ENV }}' > ./monitoring/grafana/config-overrides.env && \
echo '${{ secrets.APP_MAINNET_PROMETHEUS_CONFIG }}' > ./monitoring/prometheus/prometheus.yml && \
echo '${{ secrets.APP_MAINNET_PROMETHEUS_RULES_CONFIG }}' > ./monitoring/prometheus/rules.yml

- name: Copy files
run: |
Expand Down
40 changes: 20 additions & 20 deletions .github/workflows/testnet-deploy-to-vms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ jobs:
- name: Export docker .env
shell: bash
run: |
echo "TAG=${{ github.event.client_payload.tag }}" > .env && \
echo "${{ secrets.ENV_TESTNET }}" >> .env
echo 'TAG=${{ github.event.client_payload.tag }}' > .env && \
echo '${{ secrets.ENV_TESTNET }}' >> .env

- name: Export application config
shell: bash
run: |
echo "${{ secrets.APP_TESTNET_ALICE_CONFIG }}" > ./node.yml && \
echo "${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}" > ./bridge.yml
echo '${{ secrets.APP_TESTNET_ALICE_CONFIG }}' > ./node.yml && \
echo '${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}' > ./bridge.yml

- name: Copy files
run: |
Expand All @@ -58,14 +58,14 @@ jobs:
- name: Export docker .env
shell: bash
run: |
echo "TAG=${{ github.event.client_payload.tag }}" > .env && \
echo "${{ secrets.ENV_TESTNET }}" >> .env
echo 'TAG=${{ github.event.client_payload.tag }}' > .env && \
echo '${{ secrets.ENV_TESTNET }}' >> .env

- name: Export application config
shell: bash
run: |
echo "${{ secrets.APP_TESTNET_BOB_CONFIG }}" > ./node.yml && \
echo "${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}" > ./bridge.yml
echo '${{ secrets.APP_TESTNET_BOB_CONFIG }}' > ./node.yml && \
echo '${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}' > ./bridge.yml

- name: Copy files
run: |
Expand All @@ -92,14 +92,14 @@ jobs:
- name: Export docker .env
shell: bash
run: |
echo "TAG=${{ github.event.client_payload.tag }}" > .env && \
echo "${{ secrets.ENV_TESTNET }}" >> .env
echo 'TAG=${{ github.event.client_payload.tag }}' > .env && \
echo '${{ secrets.ENV_TESTNET }}' >> .env

- name: Export application config
shell: bash
run: |
echo "${{ secrets.APP_TESTNET_CAROL_CONFIG }}" > ./node.yml && \
echo "${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}" > ./bridge.yml
echo '${{ secrets.APP_TESTNET_CAROL_CONFIG }}' > ./node.yml && \
echo '${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}' > ./bridge.yml

- name: Copy files
run: |
Expand All @@ -126,18 +126,18 @@ jobs:
- name: Export docker .env
shell: bash
run: |
echo "TAG=${{ github.event.client_payload.tag }}" > .env && \
echo "${{ secrets.ENV_TESTNET }}" >> .env
echo 'TAG=${{ github.event.client_payload.tag }}' > .env && \
echo '${{ secrets.ENV_TESTNET }}' >> .env

- name: Export application config
shell: bash
run: |
echo "${{ secrets.APP_TESTNET_DAVE_CONFIG }}" > ./node.yml && \
echo "${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}" > ./bridge.yml && \
echo "${{ secrets.APP_TESTNET_ALERT_CONFIG }}" > ./monitoring/alertmanager/config.yml && \
echo "${{ secrets.APP_TESTNET_GRAFANA_CONFIG_ENV }}" > ./monitoring/grafana/config-overrides.env && \
echo "${{ secrets.APP_TESTNET_PROMETHEUS_CONFIG }}" > ./monitoring/prometheus/prometheus.yml && \
echo "${{ secrets.APP_TESTNET_PROMETHEUS_RULES_CONFIG }}" > ./monitoring/prometheus/rules.yml
echo '${{ secrets.APP_TESTNET_DAVE_CONFIG }}' > ./node.yml && \
echo '${{ secrets.APP_TESTNET_BRIDGE_CONFIG }}' > ./bridge.yml && \
echo '${{ secrets.APP_TESTNET_ALERT_CONFIG }}' > ./monitoring/alertmanager/config.yml && \
echo '${{ secrets.APP_TESTNET_GRAFANA_CONFIG_ENV }}' > ./monitoring/grafana/config-overrides.env && \
echo '${{ secrets.APP_TESTNET_PROMETHEUS_CONFIG }}' > ./monitoring/prometheus/prometheus.yml && \
echo '${{ secrets.APP_TESTNET_PROMETHEUS_RULES_CONFIG }}' > ./monitoring/prometheus/rules.yml

- name: Copy files
run: |
Expand Down
48 changes: 40 additions & 8 deletions examples/three-validators/monitoring/prometheus/rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'minor'
# group: 'account_amounts'
# severity: "minor"
# group: "account_amounts"
# annotations:
# description: "Fee account amount: {{ $value }} HBAR"
#
Expand All @@ -18,8 +18,8 @@
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'minor'
# group: 'account_amounts'
# severity: "minor"
# group: "account_amounts"
# annotations:
# description: "Operator account amount: {{ $value }} HBAR"
#
Expand All @@ -31,8 +31,40 @@
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'critical'
# group: 'validators'
# repeat_interval: 'long'
# severity: "critical"
# group: "validators"
# repeat_interval: "long"
# annotations:
# description: "Participation Rate: {{ $value }} %"
# description: "Participation Rate: {{ $value }} %"
# # Rules for health checking. Should be set according to validators count
# - alert: HealthyValidatorsMinor
# # Condition for alerting
# expr: count_validators_alive < 3
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "minor"
# group: "validators"
# annotations:
# description: "Healthy validators: {{ $value }}"
# - alert: HealthyValidatorsWarning
# # Condition for alerting
# expr: count_validators_alive < 2
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "warning"
# group: "validators"
# annotations:
# description: "Healthy validators: {{ $value }}"
# - alert: HealthyValidatorsCritical
# # Condition for alerting
# expr: count_validators_alive < 1
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "critical"
# group: "validators"
# repeat_interval: "long"
# annotations:
# description: "Healthy validators: {{ $value }}"
52 changes: 42 additions & 10 deletions monitoring/prometheus/rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,21 @@
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'minor'
# group: 'account_amounts'
# severity: "minor"
# group: "account_amounts"
# annotations:
# description: 'Fee account amount: {{ $value }} HBAR'
# description: "Fee account amount: {{ $value }} HBAR"
#
# - alert: LowOperatorAccountAmount
# # Condition for alerting
# expr: operator_account_amount < 100
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'minor'
# group: 'account_amounts'
# severity: "minor"
# group: "account_amounts"
# annotations:
# description: 'Operator account amount: {{ $value }} HBAR'
# description: "Operator account amount: {{ $value }} HBAR"
#
# - name: validators
# rules:
Expand All @@ -31,8 +31,40 @@
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: 'critical'
# group: 'validators'
# repeat_interval: 'long'
# severity: "critical"
# group: "validators"
# repeat_interval: "long"
# annotations:
# description: 'Participation Rate: {{ $value }} %'
# description: "Participation Rate: {{ $value }} %"
# # Rules for health checking. Should be set according to validators count
# - alert: HealthyValidatorsMinor
# # Condition for alerting
# expr: count_validators_alive < 9
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "minor"
# group: "validators"
# annotations:
# description: "Healthy validators: {{ $value }}"
# - alert: HealthyValidatorsWarning
# # Condition for alerting
# expr: count_validators_alive < 7
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "warning"
# group: "validators"
# annotations:
# description: "Healthy validators: {{ $value }}"
# - alert: HealthyValidatorsCritical
# # Condition for alerting
# expr: count_validators_alive < 5
# for: 1m
# # Labels - additional labels to be attached to the alert
# labels:
# severity: "critical"
# group: "validators"
# repeat_interval: "long"
# annotations:
# description: "Healthy validators: {{ $value }}"