From e97ecb8b3685e3179297eb244e3e6979afc65bbe Mon Sep 17 00:00:00 2001 From: Brad Ison Date: Mon, 5 Jul 2021 12:06:47 +0200 Subject: [PATCH 1/4] operator: Allow disabling Grafana deployment This adds an option to the CMO ConfigMap that allows disabling the Grafana deployment. The actual removal of deployed Grafana resources is not yet implemented. --- assets/prometheus-k8s/prometheus.yaml | 4 -- assets/thanos-querier/deployment.yaml | 6 -- jsonnet/prometheus.libsonnet | 22 +++++-- jsonnet/thanos-querier.libsonnet | 31 +++++---- pkg/manifests/config.go | 11 ++++ pkg/manifests/config_test.go | 43 +++++++++++++ pkg/manifests/manifests.go | 91 ++++++++++++++++++++++++--- pkg/operator/operator.go | 6 +- pkg/tasks/configsharing.go | 24 ++++--- pkg/tasks/grafana.go | 18 +++++- pkg/tasks/prometheus.go | 49 ++++++++------- pkg/tasks/thanos_querier.go | 51 ++++++++------- 12 files changed, 267 insertions(+), 89 deletions(-) diff --git a/assets/prometheus-k8s/prometheus.yaml b/assets/prometheus-k8s/prometheus.yaml index 0e9e380b0a..3aa52c78c3 100644 --- a/assets/prometheus-k8s/prometheus.yaml +++ b/assets/prometheus-k8s/prometheus.yaml @@ -45,7 +45,6 @@ spec: - -http-address= - -email-domain=* - -upstream=http://localhost:9090 - - -htpasswd-file=/etc/proxy/htpasswd/auth - -openshift-service-account=prometheus-k8s - '-openshift-sar={"resource": "namespaces", "verb": "get"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get"}}' @@ -77,8 +76,6 @@ spec: name: secret-prometheus-k8s-tls - mountPath: /etc/proxy/secrets name: secret-prometheus-k8s-proxy - - mountPath: /etc/proxy/htpasswd - name: secret-prometheus-k8s-htpasswd - args: - --secure-listen-address=0.0.0.0:9092 - --upstream=http://127.0.0.1:9095 @@ -199,7 +196,6 @@ spec: - kube-etcd-client-certs - prometheus-k8s-tls - prometheus-k8s-proxy - - prometheus-k8s-htpasswd - prometheus-k8s-thanos-sidecar-tls - kube-rbac-proxy securityContext: diff --git a/assets/thanos-querier/deployment.yaml b/assets/thanos-querier/deployment.yaml index 531f36d1d1..91f3617277 100644 --- a/assets/thanos-querier/deployment.yaml +++ b/assets/thanos-querier/deployment.yaml @@ -94,7 +94,6 @@ spec: - -http-address= - -email-domain=* - -upstream=http://localhost:9090 - - -htpasswd-file=/etc/proxy/htpasswd/auth - -openshift-service-account=thanos-querier - '-openshift-sar={"resource": "namespaces", "verb": "get"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get"}}' @@ -126,8 +125,6 @@ spec: name: secret-thanos-querier-tls - mountPath: /etc/proxy/secrets name: secret-thanos-querier-oauth-cookie - - mountPath: /etc/proxy/htpasswd - name: secret-thanos-querier-oauth-htpasswd - args: - --secure-listen-address=0.0.0.0:9092 - --upstream=http://127.0.0.1:9095 @@ -199,9 +196,6 @@ spec: - name: secret-thanos-querier-oauth-cookie secret: secretName: thanos-querier-oauth-cookie - - name: secret-thanos-querier-oauth-htpasswd - secret: - secretName: thanos-querier-oauth-htpasswd - name: secret-thanos-querier-kube-rbac-proxy secret: secretName: thanos-querier-kube-rbac-proxy diff --git a/jsonnet/prometheus.libsonnet b/jsonnet/prometheus.libsonnet index b190fc85f8..65139e22ba 100644 --- a/jsonnet/prometheus.libsonnet +++ b/jsonnet/prometheus.libsonnet @@ -272,6 +272,13 @@ function(params) // These patches inject the oauth proxy as a sidecar and configures it with // TLS. Additionally as the Alertmanager is protected with TLS, authN and // authZ it requires some additonal configuration. + // + // Note that Grafana is enabled by default, but may be explicitly disabled + // by the user. We need to inject an htpasswd file for the oauth-proxy when + // it is enabled, so by default the operator also adds a few things at + // runtime: a volume and volume-mount for the secret, and an argument to the + // proxy container pointing to the mounted htpasswd file. If Grafana is + // disabled, these things are not injected. prometheus+: { spec+: { alerting+: { @@ -301,10 +308,11 @@ function(params) runAsUser: 65534, }, secrets+: [ + // NOTE: The following is injected at runtime if Grafana is enabled: + // 'prometheus-k8s-htpasswd' 'kube-etcd-client-certs', //TODO(paulfantom): move it to etcd addon 'prometheus-k8s-tls', 'prometheus-k8s-proxy', - 'prometheus-k8s-htpasswd', 'prometheus-k8s-thanos-sidecar-tls', 'kube-rbac-proxy', ], @@ -348,12 +356,13 @@ function(params) }, ], args: [ + // NOTE: The following is injected at runtime if Grafana is enabled: + // '-htpasswd-file=/etc/proxy/htpasswd/auth' '-provider=openshift', '-https-address=:9091', '-http-address=', '-email-domain=*', '-upstream=http://localhost:9090', - '-htpasswd-file=/etc/proxy/htpasswd/auth', '-openshift-service-account=prometheus-k8s', '-openshift-sar={"resource": "namespaces", "verb": "get"}', '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get"}}', @@ -366,6 +375,11 @@ function(params) ], terminationMessagePolicy: 'FallbackToLogsOnError', volumeMounts: [ + // NOTE: The following is injected at runtime if Grafana is enabled: + // { + // mountPath: '/etc/proxy/htpasswd', + // name: 'secret-prometheus-k8s-htpasswd', + // }, { mountPath: '/etc/tls/private', name: 'secret-prometheus-k8s-tls', @@ -374,10 +388,6 @@ function(params) mountPath: '/etc/proxy/secrets', name: 'secret-prometheus-k8s-proxy', }, - { - mountPath: '/etc/proxy/htpasswd', - name: 'secret-prometheus-k8s-htpasswd', - }, ], }, { diff --git a/jsonnet/thanos-querier.libsonnet b/jsonnet/thanos-querier.libsonnet index 66b053aee9..728e88cf65 100644 --- a/jsonnet/thanos-querier.libsonnet +++ b/jsonnet/thanos-querier.libsonnet @@ -265,6 +265,12 @@ function(params) }, }, + // Note that Grafana is enabled by default, but may be explicitly disabled + // by the user. We need to inject an htpasswd file for the oauth-proxy when + // it is enabled, so by default the operator also adds a few things at + // runtime: a volume and volume-mount for the secret, and an argument to the + // proxy container pointing to the mounted htpasswd file. If Grafana is + // disabled, these things are not injected. deployment+: { spec+: { strategy+: { @@ -291,6 +297,13 @@ function(params) }, }, volumes+: [ + // NOTE: If Grafana is enabled, the following is injected at runtime: + // { + // name: 'secret-thanos-querier-oauth-htpasswd', + // secret: { + // secretName: 'thanos-querier-oauth-htpasswd', + // }, + // }, { name: 'secret-thanos-querier-tls', secret: { @@ -303,12 +316,6 @@ function(params) secretName: 'thanos-querier-oauth-cookie', }, }, - { - name: 'secret-thanos-querier-oauth-htpasswd', - secret: { - secretName: 'thanos-querier-oauth-htpasswd', - }, - }, { name: 'secret-thanos-querier-kube-rbac-proxy', secret: { @@ -394,12 +401,13 @@ function(params) { name: 'NO_PROXY', value: '' }, ], args: [ + // NOTE: The following is injected at runtime if Grafana is enabled: + // '-htpasswd-file=/etc/proxy/htpasswd/auth' '-provider=openshift', '-https-address=:9091', '-http-address=', '-email-domain=*', '-upstream=http://localhost:9090', - '-htpasswd-file=/etc/proxy/htpasswd/auth', '-openshift-service-account=thanos-querier', '-openshift-sar={"resource": "namespaces", "verb": "get"}', '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get"}}', @@ -412,6 +420,11 @@ function(params) ], terminationMessagePolicy: 'FallbackToLogsOnError', volumeMounts: [ + // NOTE: The following is injected at runtime if Grafana is enabled: + // { + // mountPath: '/etc/proxy/htpasswd', + // name: 'secret-thanos-querier-oauth-htpasswd', + // }, { mountPath: '/etc/tls/private', name: 'secret-thanos-querier-tls', @@ -420,10 +433,6 @@ function(params) mountPath: '/etc/proxy/secrets', name: 'secret-thanos-querier-oauth-cookie', }, - { - mountPath: '/etc/proxy/htpasswd', - name: 'secret-thanos-querier-oauth-htpasswd', - }, ], }, { diff --git a/pkg/manifests/config.go b/pkg/manifests/config.go index 2812c3aade..3ee4376e44 100644 --- a/pkg/manifests/config.go +++ b/pkg/manifests/config.go @@ -151,10 +151,21 @@ type ThanosQuerierConfig struct { } type GrafanaConfig struct { + Enabled *bool `json:"enabled"` NodeSelector map[string]string `json:"nodeSelector"` Tolerations []v1.Toleration `json:"tolerations"` } +// IsEnabled returns the underlying value of the `Enabled` boolean pointer. It +// defaults to TRUE if the pointer is nil because Grafana should be enabled by +// default. +func (g *GrafanaConfig) IsEnabled() bool { + if g.Enabled == nil { + return true + } + return *g.Enabled +} + type KubeStateMetricsConfig struct { NodeSelector map[string]string `json:"nodeSelector"` Tolerations []v1.Toleration `json:"tolerations"` diff --git a/pkg/manifests/config_test.go b/pkg/manifests/config_test.go index 07eb14c02d..a89781bf89 100644 --- a/pkg/manifests/config_test.go +++ b/pkg/manifests/config_test.go @@ -234,3 +234,46 @@ func TestHttpProxyConfig(t *testing.T) { } } } + +func TestGrafanaDefaultsToEnabled(t *testing.T) { + for _, tt := range []struct { + name string + config string + expectEnabled bool + }{ + { + name: "empty config", + config: "", + expectEnabled: true, + }, + { + name: "empty grafana config", + config: `{"grafana":{}}`, + expectEnabled: true, + }, + { + name: "grafana explicitly enabled", + config: `{"grafana":{"enabled": true}}`, + expectEnabled: true, + }, + { + name: "grafana disabled", + config: `{"grafana":{"enabled": false}}`, + expectEnabled: false, + }, + } { + t.Run(tt.name, func(t *testing.T) { + c, err := NewConfigFromString(tt.config) + if err != nil { + t.Fatal(err) + } + + enabled := c.ClusterMonitoringConfiguration.GrafanaConfig.IsEnabled() + + if enabled != tt.expectEnabled { + t.Fatalf("GrafanaConfig.IsEnabled() returned %t, expected %t", + enabled, tt.expectEnabled) + } + }) + } +} diff --git a/pkg/manifests/manifests.go b/pkg/manifests/manifests.go index bb077cb2aa..f5a9ed61c0 100644 --- a/pkg/manifests/manifests.go +++ b/pkg/manifests/manifests.go @@ -49,6 +49,8 @@ import ( const ( configManagedNamespace = "openshift-config-managed" sharedConfigMap = "monitoring-shared-config" + + htpasswdArg = "-htpasswd-file=/etc/proxy/htpasswd/auth" ) var ( @@ -1156,19 +1158,32 @@ func (f *Factory) ThanosQuerierRoute() (*routev1.Route, error) { } func (f *Factory) SharingConfig(promHost, amHost, grafanaHost, thanosHost *url.URL) *v1.ConfigMap { + data := map[string]string{} + + // Configmap keys need to include "public" to indicate that they are public values. + // See https://bugzilla.redhat.com/show_bug.cgi?id=1807100. + if promHost != nil { + data["prometheusPublicURL"] = promHost.String() + } + + if amHost != nil { + data["alertmanagerPublicURL"] = amHost.String() + } + + if grafanaHost != nil { + data["grafanaPublicURL"] = grafanaHost.String() + } + + if thanosHost != nil { + data["thanosPublicURL"] = thanosHost.String() + } + return &v1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: sharedConfigMap, Namespace: configManagedNamespace, }, - Data: map[string]string{ - // Configmap keys need to include "public" to indicate that they are public values. - // See https://bugzilla.redhat.com/show_bug.cgi?id=1807100. - "grafanaPublicURL": grafanaHost.String(), - "prometheusPublicURL": promHost.String(), - "alertmanagerPublicURL": amHost.String(), - "thanosPublicURL": thanosHost.String(), - }, + Data: data, } } @@ -1320,6 +1335,27 @@ func (f *Factory) PrometheusK8s(host string, grpcTLS *v1.Secret, trustedCABundle f.injectProxyVariables(&p.Spec.Containers[i]) + // If Grafana is enabled, inject the necessary bits for basic-auth. + if f.config.ClusterMonitoringConfiguration.GrafanaConfig.IsEnabled() { + volumeName := "secret-prometheus-k8s-htpasswd" + secretName := "prometheus-k8s-htpasswd" + + p.Spec.Containers[i].Args = append( + p.Spec.Containers[i].Args, + htpasswdArg, + ) + + p.Spec.Containers[i].VolumeMounts = append( + p.Spec.Containers[i].VolumeMounts, + htpasswdVolumeMount(volumeName), + ) + + p.Spec.Volumes = append( + p.Spec.Volumes, + htpasswdVolume(secretName, volumeName), + ) + } + case "kube-rbac-proxy": p.Spec.Containers[i].Image = f.config.Images.KubeRbacProxy case "kube-rbac-proxy-thanos": @@ -2906,6 +2942,27 @@ func (f *Factory) ThanosQuerierDeployment(grpcTLS *v1.Secret, enableUserWorkload d.Spec.Template.Spec.Volumes = append(d.Spec.Template.Spec.Volumes, volume) } + // If Grafana is enabled, inject the necessary bits for basic-auth. + if f.config.ClusterMonitoringConfiguration.GrafanaConfig.IsEnabled() { + volumeName := "secret-thanos-querier-oauth-htpasswd" + secretName := "thanos-querier-oauth-htpasswd" + + d.Spec.Template.Spec.Containers[i].Args = append( + d.Spec.Template.Spec.Containers[i].Args, + htpasswdArg, + ) + + d.Spec.Template.Spec.Containers[i].VolumeMounts = append( + d.Spec.Template.Spec.Containers[i].VolumeMounts, + htpasswdVolumeMount(volumeName), + ) + + d.Spec.Template.Spec.Volumes = append( + d.Spec.Template.Spec.Volumes, + htpasswdVolume(secretName, volumeName), + ) + } + case "thanos-query": d.Spec.Template.Spec.Containers[i].Image = f.config.Images.Thanos @@ -3699,6 +3756,24 @@ func (f *Factory) HashSecret(secret *v1.Secret, data ...string) (*v1.Secret, err }, nil } +func htpasswdVolumeMount(name string) v1.VolumeMount { + return v1.VolumeMount{ + Name: name, + MountPath: "/etc/proxy/htpasswd", + } +} + +func htpasswdVolume(secretName, volumeName string) v1.Volume { + return v1.Volume{ + Name: volumeName, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: secretName, + }, + }, + } +} + func trustedCABundleVolumeMount(name string) v1.VolumeMount { return v1.VolumeMount{ Name: name, diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 85768e54bf..7e71d8c7a0 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -447,8 +447,8 @@ func (o *Operator) sync(key string) error { tasks.NewTaskSpec("Updating Prometheus Operator", tasks.NewPrometheusOperatorTask(o.client, factory)), tasks.NewTaskSpec("Updating user workload Prometheus Operator", tasks.NewPrometheusOperatorUserWorkloadTask(o.client, factory, config)), tasks.NewTaskSpec("Updating Cluster Monitoring Operator", tasks.NewClusterMonitoringOperatorTask(o.client, factory)), - tasks.NewTaskSpec("Updating Grafana", tasks.NewGrafanaTask(o.client, factory)), - tasks.NewTaskSpec("Updating Prometheus-k8s", tasks.NewPrometheusTask(o.client, factory)), + tasks.NewTaskSpec("Updating Grafana", tasks.NewGrafanaTask(o.client, factory, config)), + tasks.NewTaskSpec("Updating Prometheus-k8s", tasks.NewPrometheusTask(o.client, factory, config)), tasks.NewTaskSpec("Updating Prometheus-user-workload", tasks.NewPrometheusUserWorkloadTask(o.client, factory, config)), tasks.NewTaskSpec("Updating Alertmanager", tasks.NewAlertmanagerTask(o.client, factory)), tasks.NewTaskSpec("Updating node-exporter", tasks.NewNodeExporterTask(o.client, factory)), @@ -456,7 +456,7 @@ func (o *Operator) sync(key string) error { tasks.NewTaskSpec("Updating openshift-state-metrics", tasks.NewOpenShiftStateMetricsTask(o.client, factory)), tasks.NewTaskSpec("Updating prometheus-adapter", tasks.NewPrometheusAdapterTaks(o.namespace, o.client, factory)), tasks.NewTaskSpec("Updating Telemeter client", tasks.NewTelemeterClientTask(o.client, factory, config)), - tasks.NewTaskSpec("Updating configuration sharing", tasks.NewConfigSharingTask(o.client, factory)), + tasks.NewTaskSpec("Updating configuration sharing", tasks.NewConfigSharingTask(o.client, factory, config)), tasks.NewTaskSpec("Updating Thanos Querier", tasks.NewThanosQuerierTask(o.client, factory, config)), tasks.NewTaskSpec("Updating User Workload Thanos Ruler", tasks.NewThanosRulerUserWorkloadTask(o.client, factory, config)), tasks.NewTaskSpec("Updating Control Plane components", tasks.NewControlPlaneTask(o.client, factory, config)), diff --git a/pkg/tasks/configsharing.go b/pkg/tasks/configsharing.go index cdb6869110..92528c03d0 100644 --- a/pkg/tasks/configsharing.go +++ b/pkg/tasks/configsharing.go @@ -15,6 +15,8 @@ package tasks import ( + "net/url" + "github.com/openshift/cluster-monitoring-operator/pkg/client" "github.com/openshift/cluster-monitoring-operator/pkg/manifests" "github.com/pkg/errors" @@ -23,12 +25,14 @@ import ( type ConfigSharingTask struct { client *client.Client factory *manifests.Factory + config *manifests.Config } -func NewConfigSharingTask(client *client.Client, factory *manifests.Factory) *ConfigSharingTask { +func NewConfigSharingTask(client *client.Client, factory *manifests.Factory, config *manifests.Config) *ConfigSharingTask { return &ConfigSharingTask{ client: client, factory: factory, + config: config, } } @@ -53,14 +57,18 @@ func (t *ConfigSharingTask) Run() error { return errors.Wrap(err, "failed to retrieve Alertmanager host") } - grafanaRoute, err := t.factory.GrafanaRoute() - if err != nil { - return errors.Wrap(err, "initializing Grafana Route failed") - } + var grafanaURL *url.URL - grafanaURL, err := t.client.GetRouteURL(grafanaRoute) - if err != nil { - return errors.Wrap(err, "failed to retrieve Grafana host") + if t.config.ClusterMonitoringConfiguration.GrafanaConfig.IsEnabled() { + grafanaRoute, err := t.factory.GrafanaRoute() + if err != nil { + return errors.Wrap(err, "initializing Grafana Route failed") + } + + grafanaURL, err = t.client.GetRouteURL(grafanaRoute) + if err != nil { + return errors.Wrap(err, "failed to retrieve Grafana host") + } } thanosRoute, err := t.factory.ThanosQuerierRoute() diff --git a/pkg/tasks/grafana.go b/pkg/tasks/grafana.go index fd9677a5a0..a095d4c840 100644 --- a/pkg/tasks/grafana.go +++ b/pkg/tasks/grafana.go @@ -18,21 +18,32 @@ import ( "github.com/openshift/cluster-monitoring-operator/pkg/client" "github.com/openshift/cluster-monitoring-operator/pkg/manifests" "github.com/pkg/errors" + "k8s.io/klog/v2" ) type GrafanaTask struct { client *client.Client factory *manifests.Factory + config *manifests.Config } -func NewGrafanaTask(client *client.Client, factory *manifests.Factory) *GrafanaTask { +func NewGrafanaTask(client *client.Client, factory *manifests.Factory, config *manifests.Config) *GrafanaTask { return &GrafanaTask{ client: client, factory: factory, + config: config, } } func (t *GrafanaTask) Run() error { + if t.config.ClusterMonitoringConfiguration.GrafanaConfig.IsEnabled() { + return t.create() + } + + return t.destroy() +} + +func (t *GrafanaTask) create() error { cr, err := t.factory.GrafanaClusterRole() if err != nil { return errors.Wrap(err, "initializing Grafana ClusterRole failed") @@ -173,3 +184,8 @@ func (t *GrafanaTask) Run() error { err = t.client.CreateOrUpdateServiceMonitor(sm) return errors.Wrap(err, "reconciling Grafana ServiceMonitor failed") } + +func (t *GrafanaTask) destroy() error { + klog.V(2).Info("Grafana is disabled, but destroy not yet implemented.") + return nil +} diff --git a/pkg/tasks/prometheus.go b/pkg/tasks/prometheus.go index 2c5801c4d4..17585de2a8 100644 --- a/pkg/tasks/prometheus.go +++ b/pkg/tasks/prometheus.go @@ -28,12 +28,14 @@ import ( type PrometheusTask struct { client *client.Client factory *manifests.Factory + config *manifests.Config } -func NewPrometheusTask(client *client.Client, factory *manifests.Factory) *PrometheusTask { +func NewPrometheusTask(client *client.Client, factory *manifests.Factory, config *manifests.Config) *PrometheusTask { return &PrometheusTask{ client: client, factory: factory, + config: config, } } @@ -88,30 +90,35 @@ func (t *PrometheusTask) Run() error { return errors.Wrap(err, "creating Prometheus proxy Secret failed") } - gs, err := t.factory.GrafanaDatasources() - if err != nil { - return errors.Wrap(err, "initializing Grafana Datasources Secret failed") - } + // If Grafana is enabled, create the basic auth secret. + if t.config.ClusterMonitoringConfiguration.GrafanaConfig.IsEnabled() { + gs, err := t.factory.GrafanaDatasources() + if err != nil { + return errors.Wrap(err, "initializing Grafana Datasources Secret failed") + } - gs, err = t.client.WaitForSecret(gs) - if err != nil { - return errors.Wrap(err, "waiting for Grafana Datasources Secret failed") - } + gs, err = t.client.WaitForSecret(gs) + if err != nil { + return errors.Wrap(err, "waiting for Grafana Datasources Secret failed") + } - d := &manifests.GrafanaDatasources{} - err = json.Unmarshal(gs.Data["prometheus.yaml"], d) - if err != nil { - return errors.Wrap(err, "unmarshalling grafana datasource failed") - } + d := &manifests.GrafanaDatasources{} + err = json.Unmarshal(gs.Data["prometheus.yaml"], d) + if err != nil { + return errors.Wrap(err, "unmarshalling grafana datasource failed") + } - hs, err := t.factory.PrometheusK8sHtpasswdSecret(d.Datasources[0].BasicAuthPassword) - if err != nil { - return errors.Wrap(err, "initializing Prometheus htpasswd Secret failed") - } + basicAuthPassword := d.Datasources[0].BasicAuthPassword - err = t.client.CreateIfNotExistSecret(hs) - if err != nil { - return errors.Wrap(err, "creating Prometheus htpasswd Secret failed") + htpasswdSecret, err := t.factory.PrometheusK8sHtpasswdSecret(basicAuthPassword) + if err != nil { + return errors.Wrap(err, "initializing Prometheus htpasswd Secret failed") + } + + err = t.client.CreateOrUpdateSecret(htpasswdSecret) + if err != nil { + return errors.Wrap(err, "creating Prometheus htpasswd Secret failed") + } } rs, err := t.factory.PrometheusRBACProxySecret() diff --git a/pkg/tasks/thanos_querier.go b/pkg/tasks/thanos_querier.go index 1f5842b5f3..59e3ecfa42 100644 --- a/pkg/tasks/thanos_querier.go +++ b/pkg/tasks/thanos_querier.go @@ -72,30 +72,35 @@ func (t *ThanosQuerierTask) Run() error { return errors.Wrap(err, "creating Thanos Querier OAuth Cookie Secret failed") } - gs, err := t.factory.GrafanaDatasources() - if err != nil { - return errors.Wrap(err, "initializing Grafana Datasources Secret failed") - } + // If Grafana is enabled, create the basic auth secret. + if t.config.ClusterMonitoringConfiguration.GrafanaConfig.IsEnabled() { + gs, err := t.factory.GrafanaDatasources() + if err != nil { + return errors.Wrap(err, "initializing Grafana Datasources Secret failed") + } - gs, err = t.client.WaitForSecret(gs) - if err != nil { - return errors.Wrap(err, "waiting for Grafana Datasources Secret failed") - } + gs, err = t.client.WaitForSecret(gs) + if err != nil { + return errors.Wrap(err, "waiting for Grafana Datasources Secret failed") + } - d := &manifests.GrafanaDatasources{} - err = json.Unmarshal(gs.Data["prometheus.yaml"], d) - if err != nil { - return errors.Wrap(err, "unmarshalling grafana datasource failed") - } + d := &manifests.GrafanaDatasources{} + err = json.Unmarshal(gs.Data["prometheus.yaml"], d) + if err != nil { + return errors.Wrap(err, "unmarshalling grafana datasource failed") + } - hs, err := t.factory.ThanosQuerierHtpasswdSecret(d.Datasources[0].BasicAuthPassword) - if err != nil { - return errors.Wrap(err, "initializing Thanos Querier htpasswd Secret failed") - } + basicAuthPassword := d.Datasources[0].BasicAuthPassword - err = t.client.CreateIfNotExistSecret(hs) - if err != nil { - return errors.Wrap(err, "creating Thanos Querier htpasswd Secret failed") + htpasswdSecret, err := t.factory.ThanosQuerierHtpasswdSecret(basicAuthPassword) + if err != nil { + return errors.Wrap(err, "initializing Thanos Querier htpasswd Secret failed") + } + + err = t.client.CreateOrUpdateSecret(htpasswdSecret) + if err != nil { + return errors.Wrap(err, "creating Thanos Querier htpasswd Secret failed") + } } rs, err := t.factory.ThanosQuerierRBACProxySecret() @@ -203,7 +208,11 @@ func (t *ThanosQuerierTask) Run() error { return errors.Wrap(err, "syncing Thanos Querier trusted CA bundle ConfigMap failed") } - dep, err := t.factory.ThanosQuerierDeployment(s, *t.config.ClusterMonitoringConfiguration.UserWorkloadEnabled, trustedCA) + dep, err := t.factory.ThanosQuerierDeployment( + s, + *t.config.ClusterMonitoringConfiguration.UserWorkloadEnabled, + trustedCA, + ) if err != nil { return errors.Wrap(err, "initializing Thanos Querier Deployment failed") } From ec2d198dcaba3250f38828273c72e49ce985c055 Mon Sep 17 00:00:00 2001 From: Brad Ison Date: Tue, 22 Jun 2021 15:18:04 +0200 Subject: [PATCH 2/4] tasks: Implement destroy method for Grafana This implements the destroy method for the Grafana task. It deletes all the resources created by the create method in the reverse order. --- pkg/client/client.go | 10 ++++ pkg/tasks/grafana.go | 139 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 147 insertions(+), 2 deletions(-) diff --git a/pkg/client/client.go b/pkg/client/client.go index 27c3d39a51..c83af3b25a 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -1024,6 +1024,16 @@ func (c *Client) CreateOrUpdateConfigMapList(cml *v1.ConfigMapList) error { return nil } +func (c *Client) DeleteConfigMapList(cml *v1.ConfigMapList) error { + for _, cm := range cml.Items { + err := c.DeleteConfigMap(&cm) + if err != nil { + return err + } + } + return nil +} + func (c *Client) CreateOrUpdateConfigMap(cm *v1.ConfigMap) error { cmClient := c.kclient.CoreV1().ConfigMaps(cm.GetNamespace()) existing, err := cmClient.Get(context.TODO(), cm.GetName(), metav1.GetOptions{}) diff --git a/pkg/tasks/grafana.go b/pkg/tasks/grafana.go index a095d4c840..dcfcab1c64 100644 --- a/pkg/tasks/grafana.go +++ b/pkg/tasks/grafana.go @@ -18,7 +18,6 @@ import ( "github.com/openshift/cluster-monitoring-operator/pkg/client" "github.com/openshift/cluster-monitoring-operator/pkg/manifests" "github.com/pkg/errors" - "k8s.io/klog/v2" ) type GrafanaTask struct { @@ -186,6 +185,142 @@ func (t *GrafanaTask) create() error { } func (t *GrafanaTask) destroy() error { - klog.V(2).Info("Grafana is disabled, but destroy not yet implemented.") + sm, err := t.factory.GrafanaServiceMonitor() + if err != nil { + return errors.Wrap(err, "initializing Grafana ServiceMonitor failed") + } + + err = t.client.DeleteServiceMonitor(sm) + if err != nil { + return errors.Wrap(err, "deleting Grafana ServiceMonitor failed") + } + + { + trustedCA, err := t.factory.GrafanaTrustedCABundle() + if err != nil { + return errors.Wrap(err, "initializing Grafana CA bundle ConfigMap failed") + } + + d, err := t.factory.GrafanaDeployment(trustedCA) + if err != nil { + return errors.Wrap(err, "initializing Grafana Deployment failed") + } + + err = t.client.DeleteDeployment(d) + if err != nil { + return errors.Wrap(err, "deleting Grafana Deployment failed") + } + + err = t.client.DeleteConfigMap(trustedCA) + if err != nil { + return errors.Wrap(err, "deleting Grafana CA bundle ConfigMap failed") + } + + err = t.client.DeleteHashedConfigMap(t.client.Namespace(), "grafana", "") + if err != nil { + return errors.Wrap(err, "deleting hashed Grafana CA bundle ConfigMap failed") + } + } + + svc, err := t.factory.GrafanaService() + if err != nil { + return errors.Wrap(err, "initializing Grafana Service failed") + } + + err = t.client.DeleteService(svc) + if err != nil { + return errors.Wrap(err, "deleting Grafana Service failed") + } + + sa, err := t.factory.GrafanaServiceAccount() + if err != nil { + return errors.Wrap(err, "initializing Grafana ServiceAccount failed") + } + + err = t.client.DeleteServiceAccount(sa) + if err != nil { + return errors.Wrap(err, "deleting Grafana ServiceAccount failed") + } + + cmdbs, err := t.factory.GrafanaDashboardSources() + if err != nil { + return errors.Wrap(err, "initializing Grafana Dashboard Sources ConfigMap failed") + } + + err = t.client.DeleteConfigMap(cmdbs) + if err != nil { + return errors.Wrap(err, "deleting Grafana Dashboard Sources ConfigMap failed") + } + + cmdds, err := t.factory.GrafanaDashboardDefinitions() + if err != nil { + return errors.Wrap(err, "initializing Grafana Dashboard Definitions ConfigMaps failed") + } + + err = t.client.DeleteConfigMapList(cmdds) + if err != nil { + return errors.Wrap(err, "deleting Grafana Dashboard Definitions ConfigMaps failed") + } + + sds, err := t.factory.GrafanaDatasources() + if err != nil { + return errors.Wrap(err, "initializing Grafana Datasources Secret failed") + } + + err = t.client.DeleteSecret(sds) + if err != nil { + return errors.Wrap(err, "deleting Grafana Datasources Secret failed") + } + + smc, err := t.factory.GrafanaConfig() + if err != nil { + return errors.Wrap(err, "initializing Grafana Config Secret failed") + } + + err = t.client.DeleteSecret(smc) + if err != nil { + return errors.Wrap(err, "deleting Grafana Config Secret failed") + } + + ps, err := t.factory.GrafanaProxySecret() + if err != nil { + return errors.Wrap(err, "initializing Grafana proxy Secret failed") + } + + err = t.client.DeleteSecret(ps) + if err != nil { + return errors.Wrap(err, "deleting Grafana proxy Secret failed") + } + + r, err := t.factory.GrafanaRoute() + if err != nil { + return errors.Wrap(err, "initializing Grafana Route failed") + } + + err = t.client.DeleteRoute(r) + if err != nil { + return errors.Wrap(err, "deleting Grafana Route failed") + } + + crb, err := t.factory.GrafanaClusterRoleBinding() + if err != nil { + return errors.Wrap(err, "initializing Grafana ClusterRoleBinding failed") + } + + err = t.client.DeleteClusterRoleBinding(crb) + if err != nil { + return errors.Wrap(err, "deleting Grafana ClusterRoleBinding failed") + } + + cr, err := t.factory.GrafanaClusterRole() + if err != nil { + return errors.Wrap(err, "initializing Grafana ClusterRole failed") + } + + err = t.client.DeleteClusterRole(cr) + if err != nil { + return errors.Wrap(err, "delete Grafana ClusterRole failed") + } + return nil } From 2fee7441171037145b18688a532d7681ccb23eef Mon Sep 17 00:00:00 2001 From: Brad Ison Date: Mon, 5 Jul 2021 12:08:06 +0200 Subject: [PATCH 3/4] test/e2e: Add test for config option to disable Grafana --- test/e2e/config_test.go | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/test/e2e/config_test.go b/test/e2e/config_test.go index f166ef8890..3b7a7c2c08 100644 --- a/test/e2e/config_test.go +++ b/test/e2e/config_test.go @@ -26,6 +26,7 @@ import ( appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -102,6 +103,59 @@ func TestClusterMonitoringOperatorConfiguration(t *testing.T) { assertOperatorCondition(t, configv1.OperatorAvailable, configv1.ConditionTrue) } +func TestGrafanaConfiguration(t *testing.T) { + config := &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster-monitoring-config", + Namespace: f.Ns, + }, + Data: map[string]string{ + "config.yaml": "grafana: { enabled: false }", + }, + } + + if err := f.OperatorClient.CreateOrUpdateConfigMap(config); err != nil { + t.Fatal(err) + } + + // Wait for Grafana deployment to disappear. + err := framework.Poll(time.Second, 5*time.Minute, func() error { + _, err := f.KubeClient.AppsV1().Deployments(f.Ns).Get(context.TODO(), "grafana", metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + + return errors.New("Grafana deployment still exists") + }) + if err != nil { + t.Fatal(err) + } + + t.Log("asserting that CMO is healthy after disabling Grafana") + assertOperatorCondition(t, configv1.OperatorDegraded, configv1.ConditionFalse) + assertOperatorCondition(t, configv1.OperatorAvailable, configv1.ConditionTrue) + + // Push a default configuration that re-enables Grafana. + config.Data["config.yaml"] = "grafana: { enabled: true }" + + if err := f.OperatorClient.CreateOrUpdateConfigMap(config); err != nil { + t.Fatal(err) + } + + // Wait for Grafana deployment to appear. + err = framework.Poll(time.Second, 5*time.Minute, func() error { + _, err := f.KubeClient.AppsV1().Deployments(f.Ns).Get(context.TODO(), "grafana", metav1.GetOptions{}) + return err + }) + if err != nil { + t.Fatal(err) + } + + t.Log("asserting that CMO is healthy after re-enabling Grafana") + assertOperatorCondition(t, configv1.OperatorDegraded, configv1.ConditionFalse) + assertOperatorCondition(t, configv1.OperatorAvailable, configv1.ConditionTrue) +} + func assertOperatorCondition(t *testing.T, conditionType configv1.ClusterStatusConditionType, conditionStatus configv1.ConditionStatus) { t.Helper() From 91de7fb1a5592de185d39cba0b819357e64aa18c Mon Sep 17 00:00:00 2001 From: Brad Ison Date: Wed, 23 Jun 2021 13:18:04 +0200 Subject: [PATCH 4/4] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1eeb76e339..12be29ced0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Note: This CHANGELOG is only for the monitoring team to track all monitoring related changes. Please see OpenShift release notes for official changes. +## 4.9 + +- [#1241](https://github.com/openshift/cluster-monitoring-operator/pull/1241) Add config option to disable Grafana deployment. + ## 4.8 - [#1087](https://github.com/openshift/cluster-monitoring-operator/pull/1087) Decrease alert severity to "warning" for ThanosQueryHttpRequestQueryErrorRateHigh and ThanosQueryHttpRequestQueryRangeErrorRateHigh alerts.