You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{ ['alertmanager/' + name]: inCluster.alertmanager[name] for name instd.objectFields(inCluster.alertmanager) } +
445
-
{ ['cluster-monitoring-operator/' + name]: inCluster.clusterMonitoringOperator[name] for name instd.objectFields(inCluster.clusterMonitoringOperator) } +
446
-
{ ['grafana/' + name]: inCluster.grafana[name] for name instd.objectFields(inCluster.grafana) } +
447
-
{ ['kube-state-metrics/' + name]: inCluster.kubeStateMetrics[name] for name instd.objectFields(inCluster.kubeStateMetrics) } +
448
-
{ ['node-exporter/' + name]: inCluster.nodeExporter[name] for name instd.objectFields(inCluster.nodeExporter) } +
449
-
{ ['openshift-state-metrics/' + name]: inCluster.openshiftStateMetrics[name] for name instd.objectFields(inCluster.openshiftStateMetrics) } +
450
-
{ ['prometheus-k8s/' + name]: inCluster.prometheus[name] for name instd.objectFields(inCluster.prometheus) } +
451
-
{ ['prometheus-operator/' + name]: inCluster.prometheusOperator[name] for name instd.objectFields(inCluster.prometheusOperator) } +
452
-
{ ['prometheus-operator-user-workload/' + name]: userWorkload.prometheusOperator[name] for name instd.objectFields(userWorkload.prometheusOperator) } +
453
-
{ ['prometheus-user-workload/' + name]: userWorkload.prometheus[name] for name instd.objectFields(userWorkload.prometheus) } +
454
-
{ ['prometheus-adapter/' + name]: inCluster.prometheusAdapter[name] for name instd.objectFields(inCluster.prometheusAdapter) } +
455
-
// needs to be removed once remote-write is allowed for sending telemetry
456
-
{ ['telemeter-client/' + name]: inCluster.telemeterClient[name] for name instd.objectFields(inCluster.telemeterClient) } +
457
-
{ ['thanos-querier/' + name]: inCluster.thanosQuerier[name] for name instd.objectFields(inCluster.thanosQuerier) } +
458
-
{ ['thanos-ruler/' + name]: inCluster.thanosRuler[name] for name instd.objectFields(inCluster.thanosRuler) } +
459
-
{ ['control-plane/' + name]: inCluster.controlPlane[name] for name instd.objectFields(inCluster.controlPlane) } +
460
-
{ ['manifests/' + name]: inCluster.manifests[name] for name instd.objectFields(inCluster.manifests) } +
461
-
{}
445
+
// When the TLS certificate used for authentication gets rotated, Prometheus
446
+
// doesn't pick up the new certificate until the connection to the target is
447
+
// re-established. Because Prometheus uses keep-alive HTTP connections, the
448
+
// consequence is that the scrapes start failing after about 1 day and the
449
+
// TargetDown alert fires. To resolve the alert, the cluster admin has to
450
+
// restart the pods being reported as down.
451
+
//
452
+
// To workaround the issue (and until Prometheus properly handles certificate
453
+
// rotation), patch the service monitors in the openshift-monitoring and
454
+
// openshift-user-workload-monitoring namespaces with fall-back authentication
// TODO(simonpasquier): once Prometheus issue #9512 is fixed downstream,
462
+
// replace addBearerTokenToServiceMonitors() by
463
+
// removeBearerTokenFromServiceMonitors() to ensure that all service monitors
464
+
// use only TLS for authentication.
465
+
addBearerTokenToServiceMonitors(
466
+
{ ['alertmanager/' + name]: inCluster.alertmanager[name] for name instd.objectFields(inCluster.alertmanager) } +
467
+
{ ['cluster-monitoring-operator/' + name]: inCluster.clusterMonitoringOperator[name] for name instd.objectFields(inCluster.clusterMonitoringOperator) } +
468
+
{ ['grafana/' + name]: inCluster.grafana[name] for name instd.objectFields(inCluster.grafana) } +
469
+
{ ['kube-state-metrics/' + name]: inCluster.kubeStateMetrics[name] for name instd.objectFields(inCluster.kubeStateMetrics) } +
470
+
{ ['node-exporter/' + name]: inCluster.nodeExporter[name] for name instd.objectFields(inCluster.nodeExporter) } +
471
+
{ ['openshift-state-metrics/' + name]: inCluster.openshiftStateMetrics[name] for name instd.objectFields(inCluster.openshiftStateMetrics) } +
472
+
{ ['prometheus-k8s/' + name]: inCluster.prometheus[name] for name instd.objectFields(inCluster.prometheus) } +
473
+
{ ['prometheus-operator/' + name]: inCluster.prometheusOperator[name] for name instd.objectFields(inCluster.prometheusOperator) } +
474
+
{ ['prometheus-operator-user-workload/' + name]: userWorkload.prometheusOperator[name] for name instd.objectFields(userWorkload.prometheusOperator) } +
475
+
{ ['prometheus-user-workload/' + name]: userWorkload.prometheus[name] for name instd.objectFields(userWorkload.prometheus) } +
476
+
{ ['prometheus-adapter/' + name]: inCluster.prometheusAdapter[name] for name instd.objectFields(inCluster.prometheusAdapter) } +
477
+
// needs to be removed once remote-write is allowed for sending telemetry
478
+
{ ['telemeter-client/' + name]: inCluster.telemeterClient[name] for name instd.objectFields(inCluster.telemeterClient) } +
479
+
{ ['thanos-querier/' + name]: inCluster.thanosQuerier[name] for name instd.objectFields(inCluster.thanosQuerier) } +
480
+
{ ['thanos-ruler/' + name]: inCluster.thanosRuler[name] for name instd.objectFields(inCluster.thanosRuler) } +
481
+
{ ['control-plane/' + name]: inCluster.controlPlane[name] for name instd.objectFields(inCluster.controlPlane) } +
482
+
{ ['manifests/' + name]: inCluster.manifests[name] for name instd.objectFields(inCluster.manifests) } +
0 commit comments