Skip to content

Commit 0f38b40

Browse files
authored
OCPBUGS-13915: create Prometheus rules programmatically according the config option (#807)
* OCPBUGS-13915: create Prometheus rules programmatically according the config option * update if-else when checking the state
1 parent d88e43c commit 0f38b40

File tree

404 files changed

+38832
-2492
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

404 files changed

+38832
-2492
lines changed

go.mod

+19-15
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,34 @@ go 1.18
44

55
require (
66
github.com/blang/semver/v4 v4.0.0
7-
github.com/evanphx/json-patch v4.12.0+incompatible
7+
github.com/evanphx/json-patch v5.6.0+incompatible
88
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da
99
github.com/openshift/api v0.0.0-20230509100629-894b49f57a15
1010
github.com/openshift/build-machinery-go v0.0.0-20220913142420-e25cf57ea46d
1111
github.com/openshift/client-go v0.0.0-20230503144108-75015d2347cb
1212
github.com/openshift/installer v0.9.0-master.0.20191219195746-103098955ced
1313
github.com/openshift/library-go v0.0.0-20230510144506-e749b54aff20
14+
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.65.2
15+
github.com/prometheus-operator/prometheus-operator/pkg/client v0.65.2
1416
github.com/prometheus/client_golang v1.14.0
1517
github.com/spf13/cobra v1.6.0
1618
github.com/spf13/pflag v1.0.5
1719
github.com/stretchr/testify v1.8.1
1820
github.com/xeipuuv/gojsonschema v1.2.0
19-
golang.org/x/net v0.8.0
20-
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8
21+
golang.org/x/net v0.9.0
22+
golang.org/x/time v0.3.0
2123
k8s.io/api v0.27.1
2224
k8s.io/apiextensions-apiserver v0.27.1
2325
k8s.io/apimachinery v0.27.1
2426
k8s.io/client-go v0.27.1
2527
k8s.io/component-base v0.27.1
26-
k8s.io/klog/v2 v2.90.1
28+
k8s.io/klog/v2 v2.100.1
2729
k8s.io/utils v0.0.0-20230406110748-d93618cff8a2
2830
sigs.k8s.io/yaml v1.3.0
2931
)
3032

3133
require (
34+
cloud.google.com/go/compute/metadata v0.2.3 // indirect
3235
github.com/NYTimes/gziphandler v1.1.1 // indirect
3336
github.com/antlr/antlr4/runtime/Go/antlr v1.4.10 // indirect
3437
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a // indirect
@@ -39,19 +42,19 @@ require (
3942
github.com/coreos/go-semver v0.3.0 // indirect
4043
github.com/coreos/go-systemd/v22 v22.4.0 // indirect
4144
github.com/davecgh/go-spew v1.1.1 // indirect
42-
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
45+
github.com/emicklei/go-restful/v3 v3.10.2 // indirect
4346
github.com/felixge/httpsnoop v1.0.3 // indirect
4447
github.com/fsnotify/fsnotify v1.6.0 // indirect
4548
github.com/ghodss/yaml v1.0.0 // indirect
46-
github.com/go-logr/logr v1.2.3 // indirect
49+
github.com/go-logr/logr v1.2.4 // indirect
4750
github.com/go-logr/stdr v1.2.2 // indirect
4851
github.com/go-openapi/jsonpointer v0.19.6 // indirect
49-
github.com/go-openapi/jsonreference v0.20.1 // indirect
52+
github.com/go-openapi/jsonreference v0.20.2 // indirect
5053
github.com/go-openapi/swag v0.22.3 // indirect
5154
github.com/gogo/protobuf v1.3.2 // indirect
5255
github.com/golang/protobuf v1.5.3 // indirect
5356
github.com/google/cel-go v0.12.6 // indirect
54-
github.com/google/gnostic v0.5.7-v3refs // indirect
57+
github.com/google/gnostic v0.6.9 // indirect
5558
github.com/google/go-cmp v0.5.9 // indirect
5659
github.com/google/gofuzz v1.2.0 // indirect
5760
github.com/google/uuid v1.3.0 // indirect
@@ -94,26 +97,27 @@ require (
9497
go.opentelemetry.io/proto/otlp v0.19.0 // indirect
9598
go.uber.org/atomic v1.7.0 // indirect
9699
go.uber.org/multierr v1.6.0 // indirect
97-
go.uber.org/zap v1.19.0 // indirect
100+
go.uber.org/zap v1.24.0 // indirect
98101
golang.org/x/crypto v0.1.0 // indirect
99-
golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5 // indirect
102+
golang.org/x/oauth2 v0.7.0 // indirect
100103
golang.org/x/sync v0.1.0 // indirect
101-
golang.org/x/sys v0.6.0 // indirect
102-
golang.org/x/term v0.6.0 // indirect
103-
golang.org/x/text v0.8.0 // indirect
104+
golang.org/x/sys v0.7.0 // indirect
105+
golang.org/x/term v0.7.0 // indirect
106+
golang.org/x/text v0.9.0 // indirect
104107
google.golang.org/appengine v1.6.7 // indirect
105108
google.golang.org/genproto v0.0.0-20220502173005-c8bf987b8c21 // indirect
106109
google.golang.org/grpc v1.51.0 // indirect
107-
google.golang.org/protobuf v1.28.1 // indirect
110+
google.golang.org/protobuf v1.30.0 // indirect
108111
gopkg.in/inf.v0 v0.9.1 // indirect
109112
gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect
110113
gopkg.in/yaml.v2 v2.4.0 // indirect
111114
gopkg.in/yaml.v3 v3.0.1 // indirect
112115
k8s.io/apiserver v0.27.1 // indirect
113116
k8s.io/kms v0.27.1 // indirect
114117
k8s.io/kube-aggregator v0.27.1 // indirect
115-
k8s.io/kube-openapi v0.0.0-20230308215209-15aac26d736a // indirect
118+
k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f // indirect
116119
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.1.1 // indirect
120+
sigs.k8s.io/controller-runtime v0.14.6 // indirect
117121
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
118122
sigs.k8s.io/kube-storage-version-migrator v0.0.4 // indirect
119123
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect

go.sum

+43-35
Large diffs are not rendered by default.

manifests/03-clusterrole.yaml

+9
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,15 @@ rules:
9494
- namespaces
9595
verbs:
9696
- get
97+
- apiGroups:
98+
- "monitoring.coreos.com"
99+
resources:
100+
- prometheusrules
101+
verbs:
102+
- create
103+
- get
104+
- list
105+
- delete
97106
---
98107
apiVersion: rbac.authorization.k8s.io/v1
99108
kind: RoleBinding

manifests/08-prometheus_rule.yaml

-43
This file was deleted.

pkg/controller/operator.go

+4
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/openshift/insights-operator/pkg/controller/periodic"
3030
"github.com/openshift/insights-operator/pkg/controller/status"
3131
"github.com/openshift/insights-operator/pkg/gather"
32+
"github.com/openshift/insights-operator/pkg/insights"
3233
"github.com/openshift/insights-operator/pkg/insights/insightsclient"
3334
"github.com/openshift/insights-operator/pkg/insights/insightsreport"
3435
"github.com/openshift/insights-operator/pkg/insights/insightsuploader"
@@ -241,6 +242,9 @@ func (s *Operator) Run(ctx context.Context, controller *controllercmd.Controller
241242
statusReporter.AddSources(clusterTransferController)
242243
go clusterTransferController.Run()
243244

245+
promRulesController := insights.NewPrometheusRulesController(secretConfigObserver, controller.KubeConfig)
246+
go promRulesController.Start(ctx)
247+
244248
klog.Warning("started")
245249

246250
<-ctx.Done()

pkg/insights/prometheus_rules.go

+153
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
package insights
2+
3+
import (
4+
"context"
5+
6+
"github.com/openshift/insights-operator/pkg/config/configobserver"
7+
"k8s.io/apimachinery/pkg/util/intstr"
8+
"k8s.io/client-go/rest"
9+
"k8s.io/klog/v2"
10+
11+
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
12+
monitoringcli "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
13+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
14+
)
15+
16+
var (
17+
rulesName string = "insights-prometheus-rules"
18+
namespaceName string = "openshift-insights"
19+
durationString string = "5m"
20+
info string = "info"
21+
22+
insightsDisabledAlert string = "InsightsDisabled"
23+
simpleContentAccessNotAvailableAlert string = "SimpleContentAccessNotAvailable"
24+
insightsRecommendationActiveAlert string = "InsightsRecommendationActive"
25+
)
26+
27+
// PrometheusRulesControllers listens to the configuration observer and
28+
// creates or removes the Insights Prometheus Rules definitions accordingly
29+
type PrometheusRulesController struct {
30+
configurator configobserver.Configurator
31+
monitoringCS monitoringcli.Interface
32+
promRulesExist bool
33+
}
34+
35+
func NewPrometheusRulesController(configurator configobserver.Configurator, kubeConfig *rest.Config) PrometheusRulesController {
36+
monitoringCS, err := monitoringcli.NewForConfig(kubeConfig)
37+
if err != nil {
38+
klog.Warningf("Unable create monitoring client: %v", err)
39+
}
40+
return PrometheusRulesController{
41+
configurator: configurator,
42+
monitoringCS: monitoringCS,
43+
}
44+
}
45+
46+
// Start starts listening to the configuration observer
47+
func (p *PrometheusRulesController) Start(ctx context.Context) {
48+
configCh, cancel := p.configurator.ConfigChanged()
49+
defer cancel()
50+
51+
p.checkAlertsDisabled(ctx)
52+
for {
53+
select {
54+
case <-configCh:
55+
p.checkAlertsDisabled(ctx)
56+
case <-ctx.Done():
57+
return
58+
}
59+
}
60+
}
61+
62+
// checkAlertsDisabled reads the actual config and either creates (if they don't exist) or removes (if they do exist)
63+
// the "insights-prometheus-rules" definition
64+
func (p *PrometheusRulesController) checkAlertsDisabled(ctx context.Context) {
65+
disableInsightsAlerts := p.configurator.Config().DisableInsightsAlerts
66+
67+
if disableInsightsAlerts && p.promRulesExist {
68+
err := p.removeInsightsAlerts(ctx)
69+
if err != nil {
70+
klog.Errorf("Failed to remove Insights Prometheus rules definition: %v", err)
71+
return
72+
}
73+
klog.Info("Prometheus rules successfully removed")
74+
p.promRulesExist = false
75+
}
76+
77+
if !disableInsightsAlerts && !p.promRulesExist {
78+
err := p.createInsightsAlerts(ctx)
79+
if err != nil {
80+
klog.Errorf("Failed to create Insights Prometheus rules definition: %v", err)
81+
return
82+
}
83+
klog.Info("Prometheus rules successfully created")
84+
p.promRulesExist = true
85+
}
86+
}
87+
88+
// createInsightsAlerts creates Insights Prometheus Rules definitions (including alerts)
89+
func (p *PrometheusRulesController) createInsightsAlerts(ctx context.Context) error {
90+
pr := &monitoringv1.PrometheusRule{
91+
ObjectMeta: metav1.ObjectMeta{
92+
Name: rulesName,
93+
Namespace: namespaceName,
94+
},
95+
Spec: monitoringv1.PrometheusRuleSpec{
96+
Groups: []monitoringv1.RuleGroup{
97+
{
98+
Name: "insights",
99+
Rules: []monitoringv1.Rule{
100+
{
101+
Alert: insightsDisabledAlert,
102+
Expr: intstr.FromString("max without (job, pod, service, instance) (cluster_operator_conditions{name=\"insights\", condition=\"Disabled\"} == 1)"),
103+
For: monitoringv1.Duration(durationString),
104+
Labels: map[string]string{
105+
"severity": info,
106+
"namespace": namespaceName,
107+
},
108+
Annotations: map[string]string{
109+
"description": "Insights operator is disabled. In order to enable Insights and benefit from recommendations specific to your cluster, please follow steps listed in the documentation: https://docs.openshift.com/container-platform/latest/support/remote_health_monitoring/enabling-remote-health-reporting.html",
110+
"summary": "Insights operator is disabled.",
111+
},
112+
},
113+
{
114+
Alert: simpleContentAccessNotAvailableAlert,
115+
Expr: intstr.FromString(" max without (job, pod, service, instance) (max_over_time(cluster_operator_conditions{name=\"insights\", condition=\"SCAAvailable\", reason=\"NotFound\"}[5m]) == 0)"),
116+
For: monitoringv1.Duration(durationString),
117+
Labels: map[string]string{
118+
"severity": info,
119+
"namespace": namespaceName,
120+
},
121+
Annotations: map[string]string{
122+
"description": "Simple content access (SCA) is not enabled. Once enabled, Insights Operator can automatically import the SCA certificates from Red Hat OpenShift Cluster Manager making it easier to use the content provided by your Red Hat subscriptions when creating container images. See https://docs.openshift.com/container-platform/latest/cicd/builds/running-entitled-builds.html for more information.",
123+
"summary": "Simple content access certificates are not available.",
124+
},
125+
},
126+
{
127+
Alert: insightsRecommendationActiveAlert,
128+
Expr: intstr.FromString("insights_recommendation_active == 1"),
129+
For: monitoringv1.Duration(durationString),
130+
Labels: map[string]string{
131+
"severity": info,
132+
},
133+
Annotations: map[string]string{
134+
"description": "Insights recommendation \"{{ $labels.description }}\" with total risk \"{{ $labels.total_risk }}\" was detected on the cluster. More information is available at {{ $labels.info_link }}.",
135+
"summary": "An Insights recommendation is active for this cluster.",
136+
},
137+
},
138+
},
139+
},
140+
},
141+
},
142+
}
143+
144+
_, err := p.monitoringCS.MonitoringV1().PrometheusRules(namespaceName).Create(ctx, pr, metav1.CreateOptions{})
145+
return err
146+
}
147+
148+
// removeInsightsAlerts removes the "insights-prometheus-rules" definition
149+
func (p *PrometheusRulesController) removeInsightsAlerts(ctx context.Context) error {
150+
return p.monitoringCS.MonitoringV1().
151+
PrometheusRules(namespaceName).
152+
Delete(ctx, rulesName, metav1.DeleteOptions{})
153+
}

0 commit comments

Comments
 (0)