From 8baa71708c4200be41117f4e3e682754935745d3 Mon Sep 17 00:00:00 2001 From: Philip Gough Date: Fri, 10 Sep 2021 14:59:54 +0100 Subject: [PATCH 1/3] manifests: Enable query log file config for Prometheus Prometheus and via prometheus-operator, provide the ability to log all PromQL queries to a file. This change enables CMO to support passing through that feature to Prometheus CR and the Prometheus pod for both platform monitoring and UWM. https://prometheus.io/docs/guides/query-log/ --- pkg/manifests/config.go | 2 ++ pkg/manifests/manifests.go | 8 ++++++++ pkg/manifests/manifests_test.go | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/pkg/manifests/config.go b/pkg/manifests/config.go index 85a442c530..47935d64b2 100644 --- a/pkg/manifests/config.go +++ b/pkg/manifests/config.go @@ -178,6 +178,7 @@ type PrometheusK8sConfig struct { RemoteWrite []RemoteWriteSpec `json:"remoteWrite"` TelemetryMatches []string `json:"-"` AlertmanagerConfigs []AdditionalAlertmanagerConfig `json:"additionalAlertmanagerConfigs"` + QueryLogFile string `json:"queryLogFile"` } type AdditionalAlertmanagerConfig struct { @@ -508,6 +509,7 @@ type PrometheusRestrictedConfig struct { EnforcedSampleLimit *uint64 `json:"enforcedSampleLimit"` EnforcedTargetLimit *uint64 `json:"enforcedTargetLimit"` AlertmanagerConfigs []AdditionalAlertmanagerConfig `json:"additionalAlertmanagerConfigs"` + QueryLogFile string `json:"queryLogFile"` } func (u *UserWorkloadConfiguration) applyDefaults() { diff --git a/pkg/manifests/manifests.go b/pkg/manifests/manifests.go index 9038f26e86..e0a149d94d 100644 --- a/pkg/manifests/manifests.go +++ b/pkg/manifests/manifests.go @@ -1376,6 +1376,10 @@ func (f *Factory) PrometheusK8s(host string, grpcTLS *v1.Secret, trustedCABundle } } + if f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.QueryLogFile != "" { + p.Spec.QueryLogFile = f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.QueryLogFile + } + telemetryEnabled := f.config.ClusterMonitoringConfiguration.TelemeterClientConfig.IsEnabled() if telemetryEnabled && f.config.RemoteWrite { @@ -1670,6 +1674,10 @@ func (f *Factory) PrometheusUserWorkload(grpcTLS *v1.Secret) (*monv1.Prometheus, p.Spec.Thanos.Image = &f.config.Images.Thanos } + if f.config.UserWorkloadConfiguration.Prometheus.QueryLogFile != "" { + p.Spec.QueryLogFile = f.config.UserWorkloadConfiguration.Prometheus.QueryLogFile + } + for i, container := range p.Spec.Containers { if container.Name == "kube-rbac-proxy" || container.Name == "kube-rbac-proxy-thanos" { p.Spec.Containers[i].Image = f.config.Images.KubeRbacProxy diff --git a/pkg/manifests/manifests_test.go b/pkg/manifests/manifests_test.go index 657e64066c..d549a86688 100644 --- a/pkg/manifests/manifests_test.go +++ b/pkg/manifests/manifests_test.go @@ -1035,6 +1035,7 @@ func TestPrometheusK8sConfiguration(t *testing.T) { datacenter: eu-west remoteWrite: - url: "https://test.remotewrite.com/api/write" + queryLogFile: /tmp/test ingress: baseAddress: monitoring-demo.staging.core-os.net `) @@ -1168,6 +1169,10 @@ ingress: if p.Spec.RemoteWrite[0].URL != "https://test.remotewrite.com/api/write" { t.Fatal("Prometheus remote-write is not configured correctly") } + + if p.Spec.QueryLogFile != "/tmp/test" { + t.Fatal("Prometheus query log is not configured correctly") + } } func TestPrometheusK8sAdditionalAlertManagerConfigsSecret(t *testing.T) { From 71a1ea6991a075781e404d32c993ee0e15d4b9af Mon Sep 17 00:00:00 2001 From: Philip Gough Date: Fri, 10 Sep 2021 15:03:21 +0100 Subject: [PATCH 2/3] test:e2e: Ensure QueryLogFile is set in spec --- test/e2e/config_test.go | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/test/e2e/config_test.go b/test/e2e/config_test.go index b0813b36e8..b2b467502d 100644 --- a/test/e2e/config_test.go +++ b/test/e2e/config_test.go @@ -151,6 +151,7 @@ func TestClusterMonitorPrometheusK8Config(t *testing.T) { data := fmt.Sprintf(`prometheusK8s: logLevel: debug retention: 10h + queryLogFile: /tmp/test.log tolerations: - operator: "Exists" externalLabels: @@ -199,6 +200,10 @@ func TestClusterMonitorPrometheusK8Config(t *testing.T) { name: "assert remote write url value in set in CR", assertion: assertRemoteWriteWasSet(f.Ns, crName, "https://test.remotewrite.com/api/write"), }, + { + name: "assert query log file value is set and correct", + assertion: assertQueryLogValueEquals(f.Ns, crName, "/tmp/test.log"), + }, { name: "assert rule for Thanos sidecar exists", assertion: f.AssertPrometheusRuleExists(thanosRule, f.Ns), @@ -523,6 +528,7 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) { enforcedTargetLimit: 10 logLevel: debug retention: 10h + queryLogFile: /tmp/test.log tolerations: - operator: "Exists" externalLabels: @@ -577,6 +583,10 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) { name: "assert enforced target limit is configured", assertion: assertEnforcedTargetLimit(10), }, + { + name: "assert query log file value is set and correct", + assertion: assertQueryLogValueEquals(f.UserWorkloadMonitoringNs, crName, "/tmp/test.log"), + }, } { t.Run(tc.name, tc.assertion) } @@ -813,3 +823,25 @@ func assertEnforcedTargetLimit(limit uint64) func(*testing.T) { } } } + +func assertQueryLogValueEquals(namespace, crName, value string) func(t *testing.T) { + return func(t *testing.T) { + err := framework.Poll(time.Second, time.Minute*5, func() error { + prom, err := f.MonitoringClient.Prometheuses(namespace).Get(context.Background(), crName, metav1.GetOptions{}) + if err != nil { + t.Fatal("failed to get required prometheus cr", err) + } + + if prom.Spec.QueryLogFile != value { + return fmt.Errorf( + "expected query log file value not found wanted '%s', got '%s'", + value, prom.Spec.QueryLogFile, + ) + } + return nil + }) + if err != nil { + t.Fatal(err) + } + } +} From 0da2132b1e41b833fb5a5792c24917cdecf4934c Mon Sep 17 00:00:00 2001 From: Philip Gough Date: Fri, 10 Sep 2021 15:04:01 +0100 Subject: [PATCH 3/3] docs: Describe the `queryLogFile` setting We want to allow cluster admins to enable the query log, however it should be noted that this is a temporary solution for debugging situations ad-hoc and should not be enabled permanently. https://prometheus.io/docs/guides/query-log/ --- CHANGELOG.md | 1 + .../configuring-cluster-monitoring.md | 4 +++ examples/user-workload/README.md | 35 ++++++++++--------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bfe8049ee..c4cfa65f52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - [#1439](https://github.com/openshift/cluster-monitoring-operator/pull/1439) Expose PodDisruptionBudget labels from kube-state-metrics metrics. - [#1377](https://github.com/openshift/cluster-monitoring-operator/pull/1377) Allow OpenShift users to configure audit logs for prometheus-adapter - [#1481](https://github.com/openshift/cluster-monitoring-operator/pull/1481) Removing one of the AlertmanagerClusterFailedToSendAlerts alerts +- [#1373](https://github.com/openshift/cluster-monitoring-operator/pull/1373) Enable admins to toggle the [query_log_file](https://prometheus.io/docs/guides/query-log/#enable-the-query-log) setting for Prometheus. ## 4.9 diff --git a/Documentation/user-guides/configuring-cluster-monitoring.md b/Documentation/user-guides/configuring-cluster-monitoring.md index 220801bd37..4e0646bf1f 100644 --- a/Documentation/user-guides/configuring-cluster-monitoring.md +++ b/Documentation/user-guides/configuring-cluster-monitoring.md @@ -83,6 +83,10 @@ resources: [v1.ResourceRequirements](https://kubernetes.io/docs/api-reference/v1 # specified by users externalLabels: [ - : ] +# log all the queries run by the engine to a log file +# this option should be enabled temporarily only to support debugging +# as there is no option to support or manage log rotation +queryLogFile: string ``` ### AlertmanagerMainConfig diff --git a/examples/user-workload/README.md b/examples/user-workload/README.md index 2d218117a2..e99c17c71f 100644 --- a/examples/user-workload/README.md +++ b/examples/user-workload/README.md @@ -18,25 +18,26 @@ data: Configuration example is located in this directory, with the following supported configuration fields: ``` prometheusOperator: -logLevel string -nodeSelector map[string]string -tolerations []v1.Toleration + logLevel string + nodeSelector map[string]string + tolerations []v1.Toleration thanosRuler: -logLevel string -nodeSelector map[string]string -tolerations []v1.Toleration -resources *v1.ResourceRequirements -volumeClaimTemplate *v1.PersistentVolumeClaim + logLevel string + nodeSelector map[string]string + tolerations []v1.Toleration + resources *v1.ResourceRequirements + volumeClaimTemplate *v1.PersistentVolumeClaim prometheus: -logLevel string -nodeSelector map[string]string -tolerations []v1.Toleration -retention string -resources *v1.ResourceRequirements -externalLabels map[string]string -volumeClaimTemplate *v1.PersistentVolumeClaim -hostport string -remoteWrite []monv1.RemoteWriteSpec + logLevel string + nodeSelector map[string]string + tolerations []v1.Toleration + retention string + resources *v1.ResourceRequirements + externalLabels map[string]string + volumeClaimTemplate *v1.PersistentVolumeClaim + hostport string + remoteWrite []monv1.RemoteWriteSpec + queryLogFile string ```