|
| 1 | +package insights |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + |
| 6 | + "github.com/openshift/insights-operator/pkg/config/configobserver" |
| 7 | + "k8s.io/apimachinery/pkg/util/intstr" |
| 8 | + "k8s.io/client-go/rest" |
| 9 | + "k8s.io/klog/v2" |
| 10 | + |
| 11 | + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" |
| 12 | + monitoringcli "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" |
| 13 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 14 | +) |
| 15 | + |
| 16 | +var ( |
| 17 | + rulesName string = "insights-prometheus-rules" |
| 18 | + namespaceName string = "openshift-insights" |
| 19 | + durationString string = "5m" |
| 20 | + info string = "info" |
| 21 | + |
| 22 | + insightsDisabledAlert string = "InsightsDisabled" |
| 23 | + simpleContentAccessNotAvailableAlert string = "SimpleContentAccessNotAvailable" |
| 24 | + insightsRecommendationActiveAlert string = "InsightsRecommendationActive" |
| 25 | +) |
| 26 | + |
| 27 | +// PrometheusRulesControllers listens to the configuration observer and |
| 28 | +// creates or removes the Insights Prometheus Rules definitions accordingly |
| 29 | +type PrometheusRulesController struct { |
| 30 | + configurator configobserver.Configurator |
| 31 | + monitoringCS monitoringcli.Interface |
| 32 | + promRulesExist bool |
| 33 | +} |
| 34 | + |
| 35 | +func NewPrometheusRulesController(configurator configobserver.Configurator, kubeConfig *rest.Config) PrometheusRulesController { |
| 36 | + monitoringCS, err := monitoringcli.NewForConfig(kubeConfig) |
| 37 | + if err != nil { |
| 38 | + klog.Warningf("Unable create monitoring client: %v", err) |
| 39 | + } |
| 40 | + return PrometheusRulesController{ |
| 41 | + configurator: configurator, |
| 42 | + monitoringCS: monitoringCS, |
| 43 | + } |
| 44 | +} |
| 45 | + |
| 46 | +// Start starts listening to the configuration observer |
| 47 | +func (p *PrometheusRulesController) Start(ctx context.Context) { |
| 48 | + configCh, cancel := p.configurator.ConfigChanged() |
| 49 | + defer cancel() |
| 50 | + |
| 51 | + p.checkAlertsDisabled(ctx) |
| 52 | + for { |
| 53 | + select { |
| 54 | + case <-configCh: |
| 55 | + p.checkAlertsDisabled(ctx) |
| 56 | + case <-ctx.Done(): |
| 57 | + return |
| 58 | + } |
| 59 | + } |
| 60 | +} |
| 61 | + |
| 62 | +// checkAlertsDisabled reads the actual config and either creates (if they don't exist) or removes (if they do exist) |
| 63 | +// the "insights-prometheus-rules" definition |
| 64 | +func (p *PrometheusRulesController) checkAlertsDisabled(ctx context.Context) { |
| 65 | + disableInsightsAlerts := p.configurator.Config().DisableInsightsAlerts |
| 66 | + |
| 67 | + if disableInsightsAlerts && p.promRulesExist { |
| 68 | + err := p.removeInsightsAlerts(ctx) |
| 69 | + if err != nil { |
| 70 | + klog.Errorf("Failed to remove Insights Prometheus rules definition: %v", err) |
| 71 | + return |
| 72 | + } |
| 73 | + klog.Info("Prometheus rules successfully removed") |
| 74 | + p.promRulesExist = false |
| 75 | + } |
| 76 | + |
| 77 | + if !disableInsightsAlerts && !p.promRulesExist { |
| 78 | + err := p.createInsightsAlerts(ctx) |
| 79 | + if err != nil { |
| 80 | + klog.Errorf("Failed to create Insights Prometheus rules definition: %v", err) |
| 81 | + return |
| 82 | + } |
| 83 | + klog.Info("Prometheus rules successfully created") |
| 84 | + p.promRulesExist = true |
| 85 | + } |
| 86 | +} |
| 87 | + |
| 88 | +// createInsightsAlerts creates Insights Prometheus Rules definitions (including alerts) |
| 89 | +func (p *PrometheusRulesController) createInsightsAlerts(ctx context.Context) error { |
| 90 | + pr := &monitoringv1.PrometheusRule{ |
| 91 | + ObjectMeta: metav1.ObjectMeta{ |
| 92 | + Name: rulesName, |
| 93 | + Namespace: namespaceName, |
| 94 | + }, |
| 95 | + Spec: monitoringv1.PrometheusRuleSpec{ |
| 96 | + Groups: []monitoringv1.RuleGroup{ |
| 97 | + { |
| 98 | + Name: "insights", |
| 99 | + Rules: []monitoringv1.Rule{ |
| 100 | + { |
| 101 | + Alert: insightsDisabledAlert, |
| 102 | + Expr: intstr.FromString("max without (job, pod, service, instance) (cluster_operator_conditions{name=\"insights\", condition=\"Disabled\"} == 1)"), |
| 103 | + For: monitoringv1.Duration(durationString), |
| 104 | + Labels: map[string]string{ |
| 105 | + "severity": info, |
| 106 | + "namespace": namespaceName, |
| 107 | + }, |
| 108 | + Annotations: map[string]string{ |
| 109 | + "description": "Insights operator is disabled. In order to enable Insights and benefit from recommendations specific to your cluster, please follow steps listed in the documentation: https://docs.openshift.com/container-platform/latest/support/remote_health_monitoring/enabling-remote-health-reporting.html", |
| 110 | + "summary": "Insights operator is disabled.", |
| 111 | + }, |
| 112 | + }, |
| 113 | + { |
| 114 | + Alert: simpleContentAccessNotAvailableAlert, |
| 115 | + Expr: intstr.FromString(" max without (job, pod, service, instance) (max_over_time(cluster_operator_conditions{name=\"insights\", condition=\"SCAAvailable\", reason=\"NotFound\"}[5m]) == 0)"), |
| 116 | + For: monitoringv1.Duration(durationString), |
| 117 | + Labels: map[string]string{ |
| 118 | + "severity": info, |
| 119 | + "namespace": namespaceName, |
| 120 | + }, |
| 121 | + Annotations: map[string]string{ |
| 122 | + "description": "Simple content access (SCA) is not enabled. Once enabled, Insights Operator can automatically import the SCA certificates from Red Hat OpenShift Cluster Manager making it easier to use the content provided by your Red Hat subscriptions when creating container images. See https://docs.openshift.com/container-platform/latest/cicd/builds/running-entitled-builds.html for more information.", |
| 123 | + "summary": "Simple content access certificates are not available.", |
| 124 | + }, |
| 125 | + }, |
| 126 | + { |
| 127 | + Alert: insightsRecommendationActiveAlert, |
| 128 | + Expr: intstr.FromString("insights_recommendation_active == 1"), |
| 129 | + For: monitoringv1.Duration(durationString), |
| 130 | + Labels: map[string]string{ |
| 131 | + "severity": info, |
| 132 | + }, |
| 133 | + Annotations: map[string]string{ |
| 134 | + "description": "Insights recommendation \"{{ $labels.description }}\" with total risk \"{{ $labels.total_risk }}\" was detected on the cluster. More information is available at {{ $labels.info_link }}.", |
| 135 | + "summary": "An Insights recommendation is active for this cluster.", |
| 136 | + }, |
| 137 | + }, |
| 138 | + }, |
| 139 | + }, |
| 140 | + }, |
| 141 | + }, |
| 142 | + } |
| 143 | + |
| 144 | + _, err := p.monitoringCS.MonitoringV1().PrometheusRules(namespaceName).Create(ctx, pr, metav1.CreateOptions{}) |
| 145 | + return err |
| 146 | +} |
| 147 | + |
| 148 | +// removeInsightsAlerts removes the "insights-prometheus-rules" definition |
| 149 | +func (p *PrometheusRulesController) removeInsightsAlerts(ctx context.Context) error { |
| 150 | + return p.monitoringCS.MonitoringV1(). |
| 151 | + PrometheusRules(namespaceName). |
| 152 | + Delete(ctx, rulesName, metav1.DeleteOptions{}) |
| 153 | +} |
0 commit comments