Skip to content

Commit 2568895

Browse files
committed
jsonnet: remove etcd gRPC calls failed alerts
backport openshift#340
1 parent 0db0ca7 commit 2568895

File tree

3 files changed

+16
-26
lines changed

3 files changed

+16
-26
lines changed

Diff for: assets/prometheus-k8s/rules.yaml

-24
Original file line numberDiff line numberDiff line change
@@ -1113,30 +1113,6 @@ spec:
11131113
for: 15m
11141114
labels:
11151115
severity: warning
1116-
- alert: EtcdHighNumberOfFailedGRPCRequests
1117-
annotations:
1118-
message: 'Etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
1119-
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
1120-
expr: |
1121-
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
1122-
/
1123-
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
1124-
> 1
1125-
for: 10m
1126-
labels:
1127-
severity: warning
1128-
- alert: EtcdHighNumberOfFailedGRPCRequests
1129-
annotations:
1130-
message: 'Etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
1131-
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
1132-
expr: |
1133-
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
1134-
/
1135-
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
1136-
> 5
1137-
for: 5m
1138-
labels:
1139-
severity: critical
11401116
- alert: EtcdGRPCRequestsSlow
11411117
annotations:
11421118
message: 'Etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method

Diff for: jsonnet/main.jsonnet

+14
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,20 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
156156
rule
157157
);
158158
utils.mapRuleGroups(replaceKubePodCrashLoopingExpression),
159+
} + {
160+
// Remove constantly firing etcd gRPC alerts.
161+
// https://bugzilla.redhat.com/show_bug.cgi?id=1717398
162+
prometheusAlerts+:: {
163+
groups:
164+
std.map(
165+
function(ruleGroup)
166+
if ruleGroup.name == 'etcd' then
167+
ruleGroup { rules: std.filter(function(rule) !('alert' in rule && rule.alert == 'EtcdHighNumberOfFailedGRPCRequests'), ruleGroup.rules) }
168+
else
169+
ruleGroup,
170+
super.groups,
171+
),
172+
},
159173
};
160174

161175
{ ['prometheus-operator/' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +

0 commit comments

Comments
 (0)