@@ -5,12 +5,11 @@ metadata:
5
5
namespace : openshift-user-workload-monitoring
6
6
spec :
7
7
groups :
8
- - name : thanos-rule.rules
8
+ - name : thanos-rule
9
9
rules :
10
10
- alert : ThanosRuleQueueIsDroppingAlerts
11
11
annotations :
12
- description : Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to queue
13
- alerts.
12
+ description : Thanos Rule {{$labels.job}} is failing to queue alerts.
14
13
summary : Thanos Rule is failing to queue alerts.
15
14
expr : |
16
15
sum by (job) (rate(thanos_alert_queue_alerts_dropped_total{job="thanos-ruler"}[5m])) > 0
19
18
severity : critical
20
19
- alert : ThanosRuleSenderIsFailingAlerts
21
20
annotations :
22
- description : Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send
23
- alerts to alertmanager.
21
+ description : Thanos Rule {{$labels.job}} is failing to send alerts to alertmanager.
24
22
summary : Thanos Rule is failing to send alerts to alertmanager.
25
23
expr : |
26
24
sum by (job) (rate(thanos_alert_sender_alerts_dropped_total{job="thanos-ruler"}[5m])) > 0
29
27
severity : critical
30
28
- alert : ThanosRuleHighRuleEvaluationFailures
31
29
annotations :
32
- description : Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to evaluate
33
- rules.
30
+ description : Thanos Rule {{$labels.job}} is failing to evaluate rules.
34
31
summary : Thanos Rule is failing to evaluate rules.
35
32
expr : |
36
33
(
44
41
severity : critical
45
42
- alert : ThanosRuleHighRuleEvaluationWarnings
46
43
annotations :
47
- description : Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of
48
- evaluation warnings.
44
+ description : Thanos Rule {{$labels.job}} has high number of evaluation warnings.
49
45
summary : Thanos Rule has high number of evaluation warnings.
50
46
expr : |
51
47
sum by (job) (rate(thanos_rule_evaluation_with_warnings_total{job="thanos-ruler"}[5m])) > 0
@@ -54,14 +50,14 @@ spec:
54
50
severity : info
55
51
- alert : ThanosRuleRuleEvaluationLatencyHigh
56
52
annotations :
57
- description : Thanos Rule {{$labels.job}}/{{$labels.pod }} has higher evaluation
53
+ description : Thanos Rule {{$labels.job}}/{{$labels.instance }} has higher evaluation
58
54
latency than interval for {{$labels.rule_group}}.
59
55
summary : Thanos Rule has high rule evaluation latency.
60
56
expr : |
61
57
(
62
- sum by (job, pod , rule_group) (prometheus_rule_group_last_duration_seconds{job="thanos-ruler"})
58
+ sum by (job, instance , rule_group) (prometheus_rule_group_last_duration_seconds{job="thanos-ruler"})
63
59
>
64
- sum by (job, pod , rule_group) (prometheus_rule_group_interval_seconds{job="thanos-ruler"})
60
+ sum by (job, instance , rule_group) (prometheus_rule_group_interval_seconds{job="thanos-ruler"})
65
61
)
66
62
for : 5m
67
63
labels :
0 commit comments