Skip to content

Commit 9070ffb

Browse files
Merge pull request #1341 from simonpasquier/bz1933847
Bug 1933847: enable hard affinity + PodDisruptionBudget for Prometheus and Thanos Ruler pods
2 parents 227ad30 + fc854a5 commit 9070ffb

17 files changed

+240
-260
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
apiVersion: policy/v1
2+
kind: PodDisruptionBudget
3+
metadata:
4+
labels:
5+
app.kubernetes.io/component: prometheus
6+
app.kubernetes.io/name: prometheus
7+
app.kubernetes.io/part-of: openshift-monitoring
8+
app.kubernetes.io/version: 2.30.3
9+
name: prometheus-k8s
10+
namespace: openshift-monitoring
11+
spec:
12+
minAvailable: 1
13+
selector:
14+
matchLabels:
15+
app.kubernetes.io/component: prometheus
16+
app.kubernetes.io/name: prometheus
17+
app.kubernetes.io/part-of: openshift-monitoring
18+
prometheus: k8s

assets/prometheus-k8s/prometheus.yaml

+10-12
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,16 @@ metadata:
1212
spec:
1313
affinity:
1414
podAntiAffinity:
15-
preferredDuringSchedulingIgnoredDuringExecution:
16-
- podAffinityTerm:
17-
labelSelector:
18-
matchLabels:
19-
app.kubernetes.io/component: prometheus
20-
app.kubernetes.io/name: prometheus
21-
app.kubernetes.io/part-of: openshift-monitoring
22-
prometheus: k8s
23-
namespaces:
24-
- openshift-monitoring
25-
topologyKey: kubernetes.io/hostname
26-
weight: 100
15+
requiredDuringSchedulingIgnoredDuringExecution:
16+
- labelSelector:
17+
matchLabels:
18+
app.kubernetes.io/component: prometheus
19+
app.kubernetes.io/name: prometheus
20+
app.kubernetes.io/part-of: openshift-monitoring
21+
prometheus: k8s
22+
namespaces:
23+
- openshift-monitoring
24+
topologyKey: kubernetes.io/hostname
2725
alerting:
2826
alertmanagers:
2927
- apiVersion: v2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
apiVersion: policy/v1
2+
kind: PodDisruptionBudget
3+
metadata:
4+
labels:
5+
app.kubernetes.io/component: prometheus
6+
app.kubernetes.io/name: prometheus
7+
app.kubernetes.io/part-of: openshift-monitoring
8+
app.kubernetes.io/version: 2.30.3
9+
name: prometheus-user-workload
10+
namespace: openshift-user-workload-monitoring
11+
spec:
12+
minAvailable: 1
13+
selector:
14+
matchLabels:
15+
app.kubernetes.io/component: prometheus
16+
app.kubernetes.io/name: prometheus
17+
app.kubernetes.io/part-of: openshift-monitoring
18+
prometheus: user-workload

assets/prometheus-user-workload/prometheus.yaml

+10-12
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,16 @@ metadata:
1212
spec:
1313
affinity:
1414
podAntiAffinity:
15-
preferredDuringSchedulingIgnoredDuringExecution:
16-
- podAffinityTerm:
17-
labelSelector:
18-
matchLabels:
19-
app.kubernetes.io/component: prometheus
20-
app.kubernetes.io/name: prometheus
21-
app.kubernetes.io/part-of: openshift-monitoring
22-
prometheus: user-workload
23-
namespaces:
24-
- openshift-user-workload-monitoring
25-
topologyKey: kubernetes.io/hostname
26-
weight: 100
15+
requiredDuringSchedulingIgnoredDuringExecution:
16+
- labelSelector:
17+
matchLabels:
18+
app.kubernetes.io/component: prometheus
19+
app.kubernetes.io/name: prometheus
20+
app.kubernetes.io/part-of: openshift-monitoring
21+
prometheus: user-workload
22+
namespaces:
23+
- openshift-user-workload-monitoring
24+
topologyKey: kubernetes.io/hostname
2725
alerting:
2826
alertmanagers:
2927
- apiVersion: v2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
apiVersion: policy/v1
2+
kind: PodDisruptionBudget
3+
metadata:
4+
labels:
5+
thanosRulerName: user-workload
6+
name: thanos-ruler-user-workload
7+
namespace: openshift-user-workload-monitoring
8+
spec:
9+
minAvailable: 1
10+
selector:
11+
matchLabels:
12+
app.kubernetes.io/name: thanos-ruler
13+
thanos-ruler: user-workload

assets/thanos-ruler/thanos-ruler.yaml

+10
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,16 @@ metadata:
66
name: user-workload
77
namespace: openshift-user-workload-monitoring
88
spec:
9+
affinity:
10+
podAntiAffinity:
11+
requiredDuringSchedulingIgnoredDuringExecution:
12+
- labelSelector:
13+
matchLabels:
14+
app.kubernetes.io/name: thanos-ruler
15+
thanos-ruler: user-workload
16+
namespaces:
17+
- openshift-user-workload-monitoring
18+
topologyKey: kubernetes.io/hostname
919
alertmanagersConfig:
1020
key: alertmanagers.yaml
1121
name: thanos-ruler-alertmanagers-config

jsonnet/components/prometheus-user-workload.libsonnet

+5-3
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,9 @@ function(params)
365365
],
366366
},
367367
},
368-
// Removing PDB since it doesn't allow cluster upgrade when hard pod anti affinity is not set https://github.com/openshift/cluster-monitoring-operator/pull/1198
369-
// Review hard anti-affinity changes and then we can add back PDB
370-
podDisruptionBudget:: {},
368+
369+
// TODO: remove podDisruptionBudget once https://github.com/prometheus-operator/kube-prometheus/pull/1156 is merged
370+
podDisruptionBudget+: {
371+
apiVersion: 'policy/v1',
372+
},
371373
}

jsonnet/components/prometheus.libsonnet

+5-3
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,9 @@ function(params)
496496
],
497497
},
498498
},
499-
// Removing PDB since it doesn't allow cluster upgrade when hard pod anti affinity is not set https://github.com/openshift/cluster-monitoring-operator/pull/1198
500-
// Review hard anti-affinity changes and then we can add back PDB
501-
podDisruptionBudget:: {},
499+
500+
// TODO: remove podDisruptionBudget once https://github.com/prometheus-operator/kube-prometheus/pull/1156 is merged
501+
podDisruptionBudget+: {
502+
apiVersion: 'policy/v1',
503+
},
502504
}

jsonnet/components/thanos-ruler.libsonnet

+29
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,17 @@ function(params) {
315315
},
316316
},
317317
spec: {
318+
affinity: {
319+
podAntiAffinity: {
320+
requiredDuringSchedulingIgnoredDuringExecution: [{
321+
labelSelector: {
322+
matchLabels: cfg.selectorLabels,
323+
},
324+
namespaces: [cfg.namespace],
325+
topologyKey: 'kubernetes.io/hostname',
326+
}],
327+
},
328+
},
318329
securityContext: {
319330
fsGroup: 65534,
320331
runAsNonRoot: true,
@@ -459,6 +470,24 @@ function(params) {
459470
},
460471
},
461472

473+
podDisruptionBudget: {
474+
apiVersion: 'policy/v1',
475+
kind: 'PodDisruptionBudget',
476+
metadata: {
477+
name: 'thanos-ruler-' + cfg.name,
478+
namespace: cfg.namespace,
479+
labels: {
480+
thanosRulerName: cfg.name,
481+
},
482+
},
483+
spec: {
484+
minAvailable: 1,
485+
selector: {
486+
matchLabels: cfg.selectorLabels,
487+
},
488+
},
489+
},
490+
462491
// statefulSet from kube-thanos is not needed because thanosruler custom resource
463492
// is used instead.
464493
//statefulSet:: {},

jsonnet/utils/anti-affinity.libsonnet

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ local addon = import 'github.com/prometheus-operator/kube-prometheus/jsonnet/kub
33
addon {
44
values+:: {
55
prometheus+: {
6-
podAntiAffinity: 'soft',
6+
podAntiAffinity: 'hard',
77
},
88
prometheusAdapter+: {
99
podAntiAffinity: 'hard',

pkg/manifests/manifests.go

+15
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ var (
121121
PrometheusK8sTrustedCABundle = "prometheus-k8s/trusted-ca-bundle.yaml"
122122
PrometheusK8sThanosSidecarServiceMonitor = "prometheus-k8s/service-monitor-thanos-sidecar.yaml"
123123
PrometheusK8sTAlertmanagerRoleBinding = "prometheus-k8s/alertmanager-role-binding.yaml"
124+
PrometheusK8sPodDisruptionBudget = "prometheus-k8s/pod-disruption-budget.yaml"
124125

125126
PrometheusUserWorkloadServingCertsCABundle = "prometheus-user-workload/serving-certs-ca-bundle.yaml"
126127
PrometheusUserWorkloadServiceAccount = "prometheus-user-workload/service-account.yaml"
@@ -137,6 +138,7 @@ var (
137138
PrometheusUserWorkloadGrpcTLSSecret = "prometheus-user-workload/grpc-tls-secret.yaml"
138139
PrometheusUserWorkloadThanosSidecarServiceMonitor = "prometheus-user-workload/service-monitor-thanos-sidecar.yaml"
139140
PrometheusUserWorkloadAlertmanagerRoleBinding = "prometheus-user-workload/alertmanager-role-binding.yaml"
141+
PrometheusUserWorkloadPodDisruptionBudget = "prometheus-user-workload/pod-disruption-budget.yaml"
140142

141143
PrometheusAdapterAPIService = "prometheus-adapter/api-service.yaml"
142144
PrometheusAdapterClusterRole = "prometheus-adapter/cluster-role.yaml"
@@ -246,6 +248,7 @@ var (
246248
ThanosRulerServiceMonitor = "thanos-ruler/service-monitor.yaml"
247249
ThanosRulerPrometheusRule = "thanos-ruler/thanos-ruler-prometheus-rule.yaml"
248250
ThanosRulerAlertmanagerRoleBinding = "thanos-ruler/alertmanager-role-binding.yaml"
251+
ThanosRulerPodDisruptionBudget = "thanos-ruler/pod-disruption-budget.yaml"
249252

250253
TelemeterTrustedCABundle = "telemeter-client/trusted-ca-bundle.yaml"
251254

@@ -2307,6 +2310,18 @@ func (f *Factory) PrometheusK8sServiceThanosSidecar() (*v1.Service, error) {
23072310
return s, nil
23082311
}
23092312

2313+
func (f *Factory) PrometheusK8sPodDisruptionBudget() (*policyv1.PodDisruptionBudget, error) {
2314+
return f.NewPodDisruptionBudget(f.assets.MustNewAssetReader(PrometheusK8sPodDisruptionBudget))
2315+
}
2316+
2317+
func (f *Factory) PrometheusUserWorkloadPodDisruptionBudget() (*policyv1.PodDisruptionBudget, error) {
2318+
return f.NewPodDisruptionBudget(f.assets.MustNewAssetReader(PrometheusUserWorkloadPodDisruptionBudget))
2319+
}
2320+
2321+
func (f *Factory) ThanosRulerPodDisruptionBudget() (*policyv1.PodDisruptionBudget, error) {
2322+
return f.NewPodDisruptionBudget(f.assets.MustNewAssetReader(ThanosRulerPodDisruptionBudget))
2323+
}
2324+
23102325
func (f *Factory) PrometheusUserWorkloadService() (*v1.Service, error) {
23112326
s, err := f.NewService(f.assets.MustNewAssetReader(PrometheusUserWorkloadService))
23122327
if err != nil {

pkg/manifests/manifests_test.go

+42
Original file line numberDiff line numberDiff line change
@@ -2570,6 +2570,20 @@ func TestPodDisruptionBudget(t *testing.T) {
25702570
getPDB func(f *Factory) (*policyv1.PodDisruptionBudget, error)
25712571
ha bool
25722572
}{
2573+
{
2574+
name: "PrometheusK8s HA",
2575+
getPDB: func(f *Factory) (*policyv1.PodDisruptionBudget, error) {
2576+
return f.PrometheusK8sPodDisruptionBudget()
2577+
},
2578+
ha: true,
2579+
},
2580+
{
2581+
name: "PrometheusK8s non-HA",
2582+
getPDB: func(f *Factory) (*policyv1.PodDisruptionBudget, error) {
2583+
return f.PrometheusK8sPodDisruptionBudget()
2584+
},
2585+
ha: false,
2586+
},
25732587
{
25742588
name: "PrometheusAdapter HA",
25752589
getPDB: func(f *Factory) (*policyv1.PodDisruptionBudget, error) {
@@ -2598,6 +2612,34 @@ func TestPodDisruptionBudget(t *testing.T) {
25982612
},
25992613
ha: false,
26002614
},
2615+
{
2616+
name: "PrometheusUWM HA",
2617+
getPDB: func(f *Factory) (*policyv1.PodDisruptionBudget, error) {
2618+
return f.PrometheusUserWorkloadPodDisruptionBudget()
2619+
},
2620+
ha: true,
2621+
},
2622+
{
2623+
name: "PrometheusUWM non-HA",
2624+
getPDB: func(f *Factory) (*policyv1.PodDisruptionBudget, error) {
2625+
return f.PrometheusUserWorkloadPodDisruptionBudget()
2626+
},
2627+
ha: false,
2628+
},
2629+
{
2630+
name: "ThanosRuler HA",
2631+
getPDB: func(f *Factory) (*policyv1.PodDisruptionBudget, error) {
2632+
return f.ThanosRulerPodDisruptionBudget()
2633+
},
2634+
ha: true,
2635+
},
2636+
{
2637+
name: "ThanosRuler non-HA",
2638+
getPDB: func(f *Factory) (*policyv1.PodDisruptionBudget, error) {
2639+
return f.ThanosRulerPodDisruptionBudget()
2640+
},
2641+
ha: false,
2642+
},
26012643
}
26022644

26032645
for _, tc := range tests {

pkg/tasks/prometheus.go

+14
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,20 @@ func (t *PrometheusTask) Run(ctx context.Context) error {
309309
return errors.Wrap(err, "error creating Prometheus Client GRPC TLS secret")
310310
}
311311

312+
{
313+
pdb, err := t.factory.PrometheusK8sPodDisruptionBudget()
314+
if err != nil {
315+
return errors.Wrap(err, "initializing Prometheus PodDisruptionBudget object failed")
316+
}
317+
318+
if pdb != nil {
319+
err = t.client.CreateOrUpdatePodDisruptionBudget(ctx, pdb)
320+
if err != nil {
321+
return errors.Wrap(err, "reconciling Prometheus PodDisruptionBudget object failed")
322+
}
323+
}
324+
}
325+
312326
{
313327
// Create trusted CA bundle ConfigMap.
314328
trustedCA, err := t.factory.PrometheusK8sTrustedCABundle()

pkg/tasks/prometheus_user_workload.go

+24
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,18 @@ func (t *PrometheusUserWorkloadTask) create(ctx context.Context) error {
208208
return errors.Wrap(err, "reconciling UserWorkload Prometheus additionalAlertmanagerConfigs secret failed")
209209
}
210210

211+
pdb, err := t.factory.PrometheusUserWorkloadPodDisruptionBudget()
212+
if err != nil {
213+
return errors.Wrap(err, "initializing UserWorkload Prometheus PodDisruptionBudget object failed")
214+
}
215+
216+
if pdb != nil {
217+
err = t.client.CreateOrUpdatePodDisruptionBudget(ctx, pdb)
218+
if err != nil {
219+
return errors.Wrap(err, "reconciling UserWorkload Prometheus PodDisruptionBudget object failed")
220+
}
221+
}
222+
211223
klog.V(4).Info("initializing UserWorkload Prometheus object")
212224
p, err := t.factory.PrometheusUserWorkload(s)
213225
if err != nil {
@@ -291,6 +303,18 @@ func (t *PrometheusUserWorkloadTask) destroy(ctx context.Context) error {
291303
"server.key", string(grpcTLS.Data["prometheus-server.key"]),
292304
)
293305

306+
pdb, err := t.factory.PrometheusUserWorkloadPodDisruptionBudget()
307+
if err != nil {
308+
return errors.Wrap(err, "initializing UserWorkload Prometheus PodDisruptionBudget object failed")
309+
}
310+
311+
if pdb != nil {
312+
err = t.client.DeletePodDisruptionBudget(ctx, pdb)
313+
if err != nil {
314+
return errors.Wrap(err, "deleting UserWorkload Prometheus PodDisruptionBudget object failed")
315+
}
316+
}
317+
294318
p, err := t.factory.PrometheusUserWorkload(s)
295319
if err != nil {
296320
return errors.Wrap(err, "initializing UserWorkload Prometheus object failed")

0 commit comments

Comments
 (0)