Skip to content

Commit aaf6681

Browse files
committed
MON-4057: Expose scrapeInterval setting for UWM Prometheus
Allows to configure scrapeInterval between 5s and 5m in `openshift-user-workload-monitoring/user-workload-monitoring-config` configmap under `prometheus` field. Signed-off-by: Jayapriya Pai <[email protected]>
1 parent 9018920 commit aaf6681

File tree

10 files changed

+161
-0
lines changed

10 files changed

+161
-0
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Note: This CHANGELOG is only for the monitoring team to track all monitoring related changes. Please see OpenShift release notes for official changes.
22

3+
## 4.18
4+
5+
- [#2503] (https://github.com/openshift/cluster-monitoring-operator/issues/2503) Expose `scrapeInterval` setting for UWM Prometheus.
6+
37
## 4.17
48

59
- [#2409](https://github.com/openshift/cluster-monitoring-operator/issues/2409) Remove prometheus-adapter code from CMO

Documentation/api.md

+1
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ The `PrometheusRestrictedConfig` resource defines the settings for the Prometheu
484484

485485
| Property | Type | Description |
486486
| -------- | ---- | ----------- |
487+
| scrapeInterval | string | Configures the default interval between consecutive scrapes in case the ServiceMonitor or PodMonitor resource does not specify any value. The interval needs to be greater than or equal to 5 seconds and less than or equal to 5 minutes. The value can be expressed as: * seconds, for example `30s`. * minutees, for example `1m`. * a mix of minutes and seconds, for example `1m30s` The default value is `30s`. |
487488
| additionalAlertmanagerConfigs | [][AdditionalAlertmanagerConfig](#additionalalertmanagerconfig) | Configures additional Alertmanager instances that receive alerts from the Prometheus component. By default, no additional Alertmanager instances are configured. |
488489
| enforcedLabelLimit | *uint64 | Specifies a per-scrape limit on the number of labels accepted for a sample. If the number of labels exceeds this limit after metric relabeling, the entire scrape is treated as failed. The default value is `0`, which means that no limit is set. |
489490
| enforcedLabelNameLengthLimit | *uint64 | Specifies a per-scrape limit on the length of a label name for a sample. If the length of a label name exceeds this limit after metric relabeling, the entire scrape is treated as failed. The default value is `0`, which means that no limit is set. |

Documentation/openshiftdocs/modules/prometheusrestrictedconfig.adoc

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Appears in: link:userworkloadconfiguration.adoc[UserWorkloadConfiguration]
1818
[options="header"]
1919
|===
2020
| Property | Type | Description
21+
|scrapeInterval|string|Configures the default interval between consecutive scrapes in case the ServiceMonitor or PodMonitor resource does not specify any value. The interval needs to be greater than or equal to 5 seconds and less than or equal to 5 minutes. The value can be expressed as: * seconds, for example `30s`. * minutees, for example `1m`. * a mix of minutes and seconds, for example `1m30s` The default value is `30s`.
22+
2123
|additionalAlertmanagerConfigs|[]link:additionalalertmanagerconfig.adoc[AdditionalAlertmanagerConfig]|Configures additional Alertmanager instances that receive alerts from the Prometheus component. By default, no additional Alertmanager instances are configured.
2224

2325
|enforcedLabelLimit|*uint64|Specifies a per-scrape limit on the number of labels accepted for a sample. If the number of labels exceeds this limit after metric relabeling, the entire scrape is treated as failed. The default value is `0`, which means that no limit is set.

pkg/manifests/config.go

+38
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"github.com/alecthomas/units"
2828
configv1 "github.com/openshift/api/config/v1"
29+
"github.com/prometheus/common/model"
2930
v1 "k8s.io/api/core/v1"
3031
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
3132
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
@@ -126,6 +127,35 @@ func (c Config) GetThanosRulerAlertmanagerConfigs() []AdditionalAlertmanagerConf
126127
return alertmanagerConfigs
127128
}
128129

130+
func scrapeIntervalLimits() (model.Duration, model.Duration) {
131+
lowerLimit, _ := model.ParseDuration("5s")
132+
upperLimit, _ := model.ParseDuration("5m")
133+
return lowerLimit, upperLimit
134+
}
135+
136+
func (c Config) checkUserWorkloadPrometheusConfig() error {
137+
if c.ClusterMonitoringConfiguration == nil || c.UserWorkloadConfiguration == nil {
138+
return nil
139+
}
140+
141+
if c.UserWorkloadConfiguration.Prometheus == nil || c.UserWorkloadConfiguration.Prometheus.ScrapeInterval == "" {
142+
return nil
143+
}
144+
145+
scrapeInterval, err := model.ParseDuration(c.UserWorkloadConfiguration.Prometheus.ScrapeInterval)
146+
147+
if err != nil {
148+
return fmt.Errorf("invalid scrape interval value: %w", err)
149+
}
150+
151+
allowedLowerLimit, allowedUpperLimit := scrapeIntervalLimits()
152+
153+
if (scrapeInterval < allowedLowerLimit) || (scrapeInterval > allowedUpperLimit) {
154+
return fmt.Errorf("scrape interval value %q outside of the allowed range [%q, %q]", c.UserWorkloadConfiguration.Prometheus.ScrapeInterval, allowedLowerLimit, allowedUpperLimit)
155+
}
156+
return nil
157+
}
158+
129159
type Images struct {
130160
MetricsServer string
131161
PromLabelProxy string
@@ -465,6 +495,14 @@ func (c *Config) Precheck() error {
465495
return nil
466496
}
467497

498+
func (c *Config) UserWorkloadPrecheck() error {
499+
if err := c.checkUserWorkloadPrometheusConfig(); err != nil {
500+
return err
501+
}
502+
503+
return nil
504+
}
505+
468506
func calculateBodySizeLimit(podCapacity int) string {
469507
const samplesPerPod = 400 // 400 samples per pod
470508
const sizePerSample = 200 // 200 Bytes

pkg/manifests/config_test.go

+62
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,68 @@ func TestLoadEnforcedBodySizeLimit(t *testing.T) {
318318
}
319319
}
320320

321+
func TestScrapeIntervalUWMPreCheck(t *testing.T) {
322+
for _, tc := range []struct {
323+
name string
324+
uwmconfig string
325+
expectedError bool
326+
}{
327+
{
328+
name: "default",
329+
uwmconfig: "",
330+
expectedError: false,
331+
},
332+
{
333+
name: "scrapeInterval valid within limits",
334+
uwmconfig: `prometheus:
335+
scrapeInterval: 15s
336+
`,
337+
expectedError: false,
338+
},
339+
{
340+
name: "scrapeInterval valid within limits-mix-of-minutes-seconds",
341+
uwmconfig: `prometheus:
342+
scrapeInterval: 1m30s
343+
`,
344+
expectedError: false,
345+
},
346+
{
347+
name: "scrapeInterval < allowed lower limit",
348+
uwmconfig: `prometheus:
349+
scrapeInterval: 2s
350+
`,
351+
expectedError: true,
352+
},
353+
{
354+
name: "scrapeInterval > allowed upper limit",
355+
uwmconfig: `prometheus:
356+
scrapeInterval: 10m
357+
`,
358+
expectedError: true,
359+
},
360+
{
361+
name: "incorrect scrape interval value",
362+
uwmconfig: `prometheus:
363+
scrapeInterval: 1234www
364+
`,
365+
expectedError: true,
366+
},
367+
} {
368+
t.Run(tc.name, func(t *testing.T) {
369+
c := NewDefaultConfig()
370+
uwc, err := NewUserConfigFromString(tc.uwmconfig)
371+
require.NoError(t, err)
372+
c.UserWorkloadConfiguration = uwc
373+
err = c.UserWorkloadPrecheck()
374+
if err != nil && tc.expectedError {
375+
return
376+
}
377+
378+
require.NoError(t, err)
379+
})
380+
}
381+
}
382+
321383
func TestCollectionProfilePreCheck(t *testing.T) {
322384
for _, tc := range []struct {
323385
name string

pkg/manifests/manifests.go

+4
Original file line numberDiff line numberDiff line change
@@ -1658,6 +1658,10 @@ func (f *Factory) PrometheusUserWorkload(grpcTLS *v1.Secret) (*monv1.Prometheus,
16581658
if err != nil {
16591659
return nil, err
16601660
}
1661+
if f.config.UserWorkloadConfiguration.Prometheus.ScrapeInterval != "" {
1662+
p.Spec.ScrapeInterval = monv1.Duration(f.config.UserWorkloadConfiguration.Prometheus.ScrapeInterval)
1663+
}
1664+
16611665
if f.config.UserWorkloadConfiguration.Prometheus.LogLevel != "" {
16621666
p.Spec.LogLevel = f.config.UserWorkloadConfiguration.Prometheus.LogLevel
16631667
}

pkg/manifests/manifests_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -1643,6 +1643,7 @@ func TestPrometheusUserWorkloadConfiguration(t *testing.T) {
16431643
c := NewDefaultConfig()
16441644

16451645
uwc, err := NewUserConfigFromString(`prometheus:
1646+
scrapeInterval: 15s
16461647
resources:
16471648
requests:
16481649
cpu: 100m
@@ -1670,6 +1671,10 @@ func TestPrometheusUserWorkloadConfiguration(t *testing.T) {
16701671
t.Fatal(err)
16711672
}
16721673

1674+
if p.Spec.ScrapeInterval != "15s" {
1675+
t.Fatal("Prometheus UWM scrapeInterval not configured correctly")
1676+
}
1677+
16731678
if p.Spec.TopologySpreadConstraints[0].MaxSkew != 1 {
16741679
t.Fatal("Prometheus UWM spread constraints MaxSkew not configured correctly")
16751680
}

pkg/manifests/types.go

+8
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,14 @@ type AlertmanagerUserWorkloadConfig struct {
601601
// The `PrometheusRestrictedConfig` resource defines the settings for the
602602
// Prometheus component that monitors user-defined projects.
603603
type PrometheusRestrictedConfig struct {
604+
// Configures the default interval between consecutive scrapes in case the ServiceMonitor or PodMonitor resource does not specify any value.
605+
// The interval needs to be greater than or equal to 5 seconds and less than or equal to 5 minutes.
606+
// The value can be expressed as:
607+
// * seconds, for example `30s`.
608+
// * minutees, for example `1m`.
609+
// * a mix of minutes and seconds, for example `1m30s`
610+
// The default value is `30s`.
611+
ScrapeInterval string `json:"scrapeInterval,omitempty"`
604612
// Configures additional Alertmanager instances that receive alerts from
605613
// the Prometheus component. By default, no additional Alertmanager
606614
// instances are configured.

pkg/operator/operator.go

+6
Original file line numberDiff line numberDiff line change
@@ -1018,6 +1018,7 @@ func (o *Operator) Config(ctx context.Context, key string) (*manifests.Config, e
10181018
if err != nil {
10191019
return nil, err
10201020
}
1021+
10211022
err = c.Precheck()
10221023
if err != nil {
10231024
return nil, err
@@ -1034,6 +1035,11 @@ func (o *Operator) Config(ctx context.Context, key string) (*manifests.Config, e
10341035
}
10351036
}
10361037

1038+
err = c.UserWorkloadPrecheck()
1039+
if err != nil {
1040+
return nil, fmt.Errorf("%w: %w", ErrUserWorkloadInvalidConfiguration, err)
1041+
}
1042+
10371043
err = c.LoadEnforcedBodySizeLimit(o.client, ctx)
10381044
if err != nil {
10391045
c.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit = ""

test/e2e/config_test.go

+31
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ import (
3030
"github.com/openshift/cluster-monitoring-operator/test/e2e/framework"
3131
"github.com/stretchr/testify/require"
3232

33+
monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
3334
v1 "k8s.io/api/core/v1"
35+
3436
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3537
)
3638

@@ -585,6 +587,7 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
585587

586588
uwmCM := f.BuildUserWorkloadConfigMap(t,
587589
fmt.Sprintf(`prometheus:
590+
scrapeInterval: 15s
588591
enforcedTargetLimit: 10
589592
enforcedLabelLimit: 500
590593
enforcedLabelNameLengthLimit: 50
@@ -671,6 +674,10 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
671674
name: "assert query log file value is set and correct",
672675
assertion: assertQueryLogValueEquals(f.UserWorkloadMonitoringNs, crName, "/tmp/test.log"),
673676
},
677+
{
678+
name: "assert scrape interval is configured",
679+
assertion: assertScrapeInterval("15s"),
680+
},
674681
} {
675682
t.Run(tc.name, tc.assertion)
676683
}
@@ -1028,6 +1035,30 @@ func assertRemoteWriteWasSet(namespace, crName, urlValue string) func(t *testing
10281035
}
10291036
}
10301037

1038+
func assertScrapeInterval(scrapeInterval string) func(*testing.T) {
1039+
ctx := context.Background()
1040+
return func(t *testing.T) {
1041+
err := framework.Poll(time.Second, 5*time.Minute, func() error {
1042+
p, err := f.MonitoringClient.Prometheuses(f.UserWorkloadMonitoringNs).Get(ctx, "user-workload", metav1.GetOptions{})
1043+
if err != nil {
1044+
return err
1045+
}
1046+
1047+
if p.Spec.ScrapeInterval == "" {
1048+
return errors.New("scrapeInterval is not set")
1049+
} else if p.Spec.ScrapeInterval != monv1.Duration(scrapeInterval) {
1050+
return fmt.Errorf("expected scrapeInterval to be %s, but got %s", scrapeInterval, p.Spec.ScrapeInterval)
1051+
}
1052+
1053+
return nil
1054+
})
1055+
1056+
if err != nil {
1057+
t.Fatalf("Timed out waiting for scrapeInterval configuration: %v", err)
1058+
}
1059+
}
1060+
}
1061+
10311062
func assertEnforcedTargetLimit(limit uint64) func(*testing.T) {
10321063
ctx := context.Background()
10331064
return func(t *testing.T) {

0 commit comments

Comments
 (0)