Skip to content

Commit c97c6f7

Browse files
Merge pull request #1637 from simonpasquier/bz2033575
Bug 2033575: use bearer token as fall-back authn method
2 parents de21d1e + f661081 commit c97c6f7

10 files changed

+76
-26
lines changed

Diff for: assets/alertmanager/service-monitor.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ metadata:
1111
namespace: openshift-monitoring
1212
spec:
1313
endpoints:
14-
- interval: 30s
14+
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
15+
interval: 30s
1516
port: metrics
1617
scheme: https
1718
tlsConfig:

Diff for: assets/grafana/service-monitor.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ metadata:
1010
namespace: openshift-monitoring
1111
spec:
1212
endpoints:
13-
- interval: 30s
13+
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
14+
interval: 30s
1415
port: metrics
1516
scheme: https
1617
tlsConfig:

Diff for: assets/prometheus-k8s/service-monitor-thanos-sidecar.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ metadata:
1111
namespace: openshift-monitoring
1212
spec:
1313
endpoints:
14-
- interval: 30s
14+
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
15+
interval: 30s
1516
port: thanos-proxy
1617
scheme: https
1718
tlsConfig:

Diff for: assets/prometheus-k8s/service-monitor.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ metadata:
1111
namespace: openshift-monitoring
1212
spec:
1313
endpoints:
14-
- interval: 30s
14+
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
15+
interval: 30s
1516
port: metrics
1617
scheme: https
1718
tlsConfig:

Diff for: assets/prometheus-user-workload/service-monitor-thanos-sidecar.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ metadata:
1111
namespace: openshift-user-workload-monitoring
1212
spec:
1313
endpoints:
14-
- interval: 30s
14+
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
15+
interval: 30s
1516
port: thanos-proxy
1617
scheme: https
1718
tlsConfig:

Diff for: assets/prometheus-user-workload/service-monitor.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ metadata:
1111
namespace: openshift-user-workload-monitoring
1212
spec:
1313
endpoints:
14-
- interval: 30s
14+
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
15+
interval: 30s
1516
port: metrics
1617
scheme: https
1718
tlsConfig:

Diff for: assets/telemeter-client/service-monitor.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ metadata:
77
namespace: openshift-monitoring
88
spec:
99
endpoints:
10-
- interval: 30s
10+
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
11+
interval: 30s
1112
port: https
1213
scheme: https
1314
tlsConfig:

Diff for: assets/thanos-querier/service-monitor.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ metadata:
1111
namespace: openshift-monitoring
1212
spec:
1313
endpoints:
14-
- interval: 30s
14+
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
15+
interval: 30s
1516
port: metrics
1617
scheme: https
1718
tlsConfig:

Diff for: jsonnet/main.jsonnet

+41-18
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ local removeLimits = (import './utils/remove-limits.libsonnet').removeLimits;
22
local addAnnotations = (import './utils/add-annotations.libsonnet').addAnnotations;
33
local sanitizeAlertRules = (import './utils/sanitize-rules.libsonnet').sanitizeAlertRules;
44
local removeNetworkPolicy = (import './utils/remove-network-policy.libsonnet').removeNetworkPolicy;
5+
local addBearerTokenToServiceMonitors = (import './utils/add-bearer-token-to-service-monitors.libsonnet').addBearerTokenToServiceMonitors;
56

67
local alertmanager = import './components/alertmanager.libsonnet';
78
local grafana = import './components/grafana.libsonnet';
@@ -441,22 +442,44 @@ local userWorkload =
441442

442443
// Manifestation
443444
sanitizeAlertRules(addAnnotations(removeLimits(removeNetworkPolicy(
444-
{ ['alertmanager/' + name]: inCluster.alertmanager[name] for name in std.objectFields(inCluster.alertmanager) } +
445-
{ ['cluster-monitoring-operator/' + name]: inCluster.clusterMonitoringOperator[name] for name in std.objectFields(inCluster.clusterMonitoringOperator) } +
446-
{ ['grafana/' + name]: inCluster.grafana[name] for name in std.objectFields(inCluster.grafana) } +
447-
{ ['kube-state-metrics/' + name]: inCluster.kubeStateMetrics[name] for name in std.objectFields(inCluster.kubeStateMetrics) } +
448-
{ ['node-exporter/' + name]: inCluster.nodeExporter[name] for name in std.objectFields(inCluster.nodeExporter) } +
449-
{ ['openshift-state-metrics/' + name]: inCluster.openshiftStateMetrics[name] for name in std.objectFields(inCluster.openshiftStateMetrics) } +
450-
{ ['prometheus-k8s/' + name]: inCluster.prometheus[name] for name in std.objectFields(inCluster.prometheus) } +
451-
{ ['prometheus-operator/' + name]: inCluster.prometheusOperator[name] for name in std.objectFields(inCluster.prometheusOperator) } +
452-
{ ['prometheus-operator-user-workload/' + name]: userWorkload.prometheusOperator[name] for name in std.objectFields(userWorkload.prometheusOperator) } +
453-
{ ['prometheus-user-workload/' + name]: userWorkload.prometheus[name] for name in std.objectFields(userWorkload.prometheus) } +
454-
{ ['prometheus-adapter/' + name]: inCluster.prometheusAdapter[name] for name in std.objectFields(inCluster.prometheusAdapter) } +
455-
// needs to be removed once remote-write is allowed for sending telemetry
456-
{ ['telemeter-client/' + name]: inCluster.telemeterClient[name] for name in std.objectFields(inCluster.telemeterClient) } +
457-
{ ['thanos-querier/' + name]: inCluster.thanosQuerier[name] for name in std.objectFields(inCluster.thanosQuerier) } +
458-
{ ['thanos-ruler/' + name]: inCluster.thanosRuler[name] for name in std.objectFields(inCluster.thanosRuler) } +
459-
{ ['control-plane/' + name]: inCluster.controlPlane[name] for name in std.objectFields(inCluster.controlPlane) } +
460-
{ ['manifests/' + name]: inCluster.manifests[name] for name in std.objectFields(inCluster.manifests) } +
461-
{}
445+
// When the TLS certificate used for authentication gets rotated, Prometheus
446+
// doesn't pick up the new certificate until the connection to the target is
447+
// re-established. Because Prometheus uses keep-alive HTTP connections, the
448+
// consequence is that the scrapes start failing after about 1 day and the
449+
// TargetDown alert fires. To resolve the alert, the cluster admin has to
450+
// restart the pods being reported as down.
451+
//
452+
// To workaround the issue (and until Prometheus properly handles certificate
453+
// rotation), patch the service monitors in the openshift-monitoring and
454+
// openshift-user-workload-monitoring namespaces with fall-back authentication
455+
// method using the service account bearer token.
456+
//
457+
// Details in:
458+
// https://github.com/prometheus/prometheus/issues/9512 (upstream)
459+
// https://bugzilla.redhat.com/show_bug.cgi?id=2033575
460+
//
461+
// TODO(simonpasquier): once Prometheus issue #9512 is fixed downstream,
462+
// replace addBearerTokenToServiceMonitors() by
463+
// removeBearerTokenFromServiceMonitors() to ensure that all service monitors
464+
// use only TLS for authentication.
465+
addBearerTokenToServiceMonitors(
466+
{ ['alertmanager/' + name]: inCluster.alertmanager[name] for name in std.objectFields(inCluster.alertmanager) } +
467+
{ ['cluster-monitoring-operator/' + name]: inCluster.clusterMonitoringOperator[name] for name in std.objectFields(inCluster.clusterMonitoringOperator) } +
468+
{ ['grafana/' + name]: inCluster.grafana[name] for name in std.objectFields(inCluster.grafana) } +
469+
{ ['kube-state-metrics/' + name]: inCluster.kubeStateMetrics[name] for name in std.objectFields(inCluster.kubeStateMetrics) } +
470+
{ ['node-exporter/' + name]: inCluster.nodeExporter[name] for name in std.objectFields(inCluster.nodeExporter) } +
471+
{ ['openshift-state-metrics/' + name]: inCluster.openshiftStateMetrics[name] for name in std.objectFields(inCluster.openshiftStateMetrics) } +
472+
{ ['prometheus-k8s/' + name]: inCluster.prometheus[name] for name in std.objectFields(inCluster.prometheus) } +
473+
{ ['prometheus-operator/' + name]: inCluster.prometheusOperator[name] for name in std.objectFields(inCluster.prometheusOperator) } +
474+
{ ['prometheus-operator-user-workload/' + name]: userWorkload.prometheusOperator[name] for name in std.objectFields(userWorkload.prometheusOperator) } +
475+
{ ['prometheus-user-workload/' + name]: userWorkload.prometheus[name] for name in std.objectFields(userWorkload.prometheus) } +
476+
{ ['prometheus-adapter/' + name]: inCluster.prometheusAdapter[name] for name in std.objectFields(inCluster.prometheusAdapter) } +
477+
// needs to be removed once remote-write is allowed for sending telemetry
478+
{ ['telemeter-client/' + name]: inCluster.telemeterClient[name] for name in std.objectFields(inCluster.telemeterClient) } +
479+
{ ['thanos-querier/' + name]: inCluster.thanosQuerier[name] for name in std.objectFields(inCluster.thanosQuerier) } +
480+
{ ['thanos-ruler/' + name]: inCluster.thanosRuler[name] for name in std.objectFields(inCluster.thanosRuler) } +
481+
{ ['control-plane/' + name]: inCluster.controlPlane[name] for name in std.objectFields(inCluster.controlPlane) } +
482+
{ ['manifests/' + name]: inCluster.manifests[name] for name in std.objectFields(inCluster.manifests) } +
483+
{}
484+
)
462485
))))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
addBearerTokenToServiceMonitors(o): {
3+
local addBearerToken(o) = o {
4+
[if o.kind == 'ServiceMonitor' && o.metadata.name != 'etcd' then 'spec']+: {
5+
endpoints: [
6+
if std.objectHas(e, 'scheme') && e.scheme == 'https' then
7+
e {
8+
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
9+
}
10+
else
11+
e
12+
for e in super.endpoints
13+
],
14+
},
15+
},
16+
[k]: addBearerToken(o[k])
17+
for k in std.objectFieldsAll(o)
18+
},
19+
}

0 commit comments

Comments
 (0)