es-ukrtb/config/prometheus/alert.rules

31 lines
808 B
Plaintext
Raw Normal View History

2024-01-30 11:46:58 +00:00
groups:
- name: example
rules:
- alert: service_down
expr: up == 0
for: 30s
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 30 seconds."
- alert: high_load
expr: node_load1 > 0.8
for: 30s
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} under high load"
description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."
- alert: site_down
expr: probe_success < 1
for: 30s
labels:
severity: page
annotations:
summary: "Site Down: {{$labels.instance}}"
description: "Site Down: {{$labels.instance}} for more than 30 seconds"