File tree 2 files changed +32
-5
lines changed
2 files changed +32
-5
lines changed Original file line number Diff line number Diff line change @@ -248,25 +248,26 @@ spec:
248
248
summary : Thanos Sidecar cannot connect to Prometheus
249
249
expr : |
250
250
sum by (job, instance) (thanos_sidecar_prometheus_up{job=~"prometheus-(k8s|user-workload)-thanos-sidecar"} == 0)
251
- for : 5m
251
+ for : 1h
252
252
labels :
253
- severity : critical
253
+ severity : warning
254
254
- alert : ThanosSidecarBucketOperationsFailed
255
255
annotations :
256
256
description : Thanos Sidecar {{$labels.job}} {{$labels.instance}} bucket operations
257
257
are failing
258
258
summary : Thanos Sidecar bucket operations are failing
259
259
expr : |
260
260
rate(thanos_objstore_bucket_operation_failures_total{job=~"prometheus-(k8s|user-workload)-thanos-sidecar"}[5m]) > 0
261
- for : 5m
261
+ for : 1h
262
262
labels :
263
- severity : critical
263
+ severity : warning
264
264
- alert : ThanosSidecarUnhealthy
265
265
annotations :
266
266
description : Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for
267
267
more than {{ $value }} seconds.
268
268
summary : Thanos Sidecar is unhealthy.
269
269
expr : |
270
270
time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"prometheus-(k8s|user-workload)-thanos-sidecar"})) by (job,pod) >= 240
271
+ for : 1h
271
272
labels :
272
- severity : critical
273
+ severity : warning
Original file line number Diff line number Diff line change @@ -74,6 +74,32 @@ local patchedRules = [
74
74
},
75
75
],
76
76
},
77
+ {
78
+ name: 'thanos-sidecar' ,
79
+ rules: [
80
+ {
81
+ alert: 'ThanosSidecarPrometheusDown' ,
82
+ 'for' : '1h' ,
83
+ labels: {
84
+ severity: 'warning' ,
85
+ },
86
+ },
87
+ {
88
+ alert: 'ThanosSidecarBucketOperationsFailed' ,
89
+ 'for' : '1h' ,
90
+ labels: {
91
+ severity: 'warning' ,
92
+ },
93
+ },
94
+ {
95
+ alert: 'ThanosSidecarUnhealthy' ,
96
+ 'for' : '1h' ,
97
+ labels: {
98
+ severity: 'warning' ,
99
+ },
100
+ },
101
+ ],
102
+ },
77
103
];
78
104
79
105
local patchOrExcludeRule(rule, ruleSet, operation) =
You can’t perform that action at this time.
0 commit comments