Skip to content

Commit b2f203f

Browse files
committed
MON-4057: Expose scrapeInterval setting for UWM Prometheus
Allows to configure scrapeInterval between 5s and 5m in `openshift-user-workload-monitoring/user-workload-monitoring-config` configmap under `prometheus` field. Signed-off-by: Jayapriya Pai <[email protected]>
1 parent 1e90772 commit b2f203f

File tree

10 files changed

+147
-0
lines changed

10 files changed

+147
-0
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Note: This CHANGELOG is only for the monitoring team to track all monitoring related changes. Please see OpenShift release notes for official changes.
22

3+
## 4.18
4+
5+
- [#2503] (https://github.com/openshift/cluster-monitoring-operator/issues/2503) Expose `scrapeInterval` setting for UWM Prometheus.
6+
37
## 4.17
48

59
- [#2409](https://github.com/openshift/cluster-monitoring-operator/issues/2409) Remove prometheus-adapter code from CMO

Documentation/api.md

+1
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ The `PrometheusRestrictedConfig` resource defines the settings for the Prometheu
484484

485485
| Property | Type | Description |
486486
| -------- | ---- | ----------- |
487+
| scrapeInterval | string | Configures the default interval between consecutive scrapes in case the `ServiceMonitor` or `PodMonitor` resource does not specify any value. The interval must be set between 5 seconds and 5 minutes. The value can be expressed in: seconds (for example `30s`.), minutes (for example `1m`.) or a mix of minutes and seconds (for example `1m30s`.). The default value is `30s`. |
487488
| additionalAlertmanagerConfigs | [][AdditionalAlertmanagerConfig](#additionalalertmanagerconfig) | Configures additional Alertmanager instances that receive alerts from the Prometheus component. By default, no additional Alertmanager instances are configured. |
488489
| enforcedLabelLimit | *uint64 | Specifies a per-scrape limit on the number of labels accepted for a sample. If the number of labels exceeds this limit after metric relabeling, the entire scrape is treated as failed. The default value is `0`, which means that no limit is set. |
489490
| enforcedLabelNameLengthLimit | *uint64 | Specifies a per-scrape limit on the length of a label name for a sample. If the length of a label name exceeds this limit after metric relabeling, the entire scrape is treated as failed. The default value is `0`, which means that no limit is set. |

Documentation/openshiftdocs/modules/prometheusrestrictedconfig.adoc

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Appears in: link:userworkloadconfiguration.adoc[UserWorkloadConfiguration]
1818
[options="header"]
1919
|===
2020
| Property | Type | Description
21+
|scrapeInterval|string|Configures the default interval between consecutive scrapes in case the `ServiceMonitor` or `PodMonitor` resource does not specify any value. The interval must be set between 5 seconds and 5 minutes. The value can be expressed in: seconds (for example `30s`.), minutes (for example `1m`.) or a mix of minutes and seconds (for example `1m30s`.). The default value is `30s`.
22+
2123
|additionalAlertmanagerConfigs|[]link:additionalalertmanagerconfig.adoc[AdditionalAlertmanagerConfig]|Configures additional Alertmanager instances that receive alerts from the Prometheus component. By default, no additional Alertmanager instances are configured.
2224

2325
|enforcedLabelLimit|*uint64|Specifies a per-scrape limit on the number of labels accepted for a sample. If the number of labels exceeds this limit after metric relabeling, the entire scrape is treated as failed. The default value is `0`, which means that no limit is set.

pkg/manifests/config.go

+35
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
"github.com/alecthomas/units"
2626
configv1 "github.com/openshift/api/config/v1"
27+
"github.com/prometheus/common/model"
2728
v1 "k8s.io/api/core/v1"
2829
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
2930
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
@@ -126,6 +127,35 @@ func (c Config) GetThanosRulerAlertmanagerConfigs() []AdditionalAlertmanagerConf
126127
return alertmanagerConfigs
127128
}
128129

130+
func scrapeIntervalLimits() (model.Duration, model.Duration) {
131+
lowerLimit, _ := model.ParseDuration("5s")
132+
upperLimit, _ := model.ParseDuration("5m")
133+
return lowerLimit, upperLimit
134+
}
135+
136+
func (u *UserWorkloadConfiguration) check() error {
137+
if u == nil {
138+
return nil
139+
}
140+
141+
if u.Prometheus == nil || u.Prometheus.ScrapeInterval == "" {
142+
return nil
143+
}
144+
145+
scrapeInterval, err := model.ParseDuration(u.Prometheus.ScrapeInterval)
146+
147+
if err != nil {
148+
return fmt.Errorf("invalid scrape interval value: %w", err)
149+
}
150+
151+
allowedLowerLimit, allowedUpperLimit := scrapeIntervalLimits()
152+
153+
if (scrapeInterval < allowedLowerLimit) || (scrapeInterval > allowedUpperLimit) {
154+
return fmt.Errorf("scrape interval value %q outside of the allowed range [%q, %q]", u.Prometheus.ScrapeInterval, allowedLowerLimit, allowedUpperLimit)
155+
}
156+
return nil
157+
}
158+
129159
type Images struct {
130160
MetricsServer string
131161
PromLabelProxy string
@@ -541,6 +571,11 @@ func NewUserConfigFromString(content string) (*UserWorkloadConfiguration, error)
541571
}
542572

543573
u.applyDefaults()
574+
575+
if err := u.check(); err != nil {
576+
return nil, err
577+
}
578+
544579
return u, nil
545580
}
546581

pkg/manifests/config_test.go

+58
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,64 @@ func TestLoadEnforcedBodySizeLimit(t *testing.T) {
484484
}
485485
}
486486

487+
func TestScrapeIntervalUWMPreCheck(t *testing.T) {
488+
for _, tc := range []struct {
489+
name string
490+
uwmconfig string
491+
expectedError bool
492+
}{
493+
{
494+
name: "default",
495+
uwmconfig: "",
496+
expectedError: false,
497+
},
498+
{
499+
name: "scrapeInterval valid within limits",
500+
uwmconfig: `prometheus:
501+
scrapeInterval: 15s
502+
`,
503+
expectedError: false,
504+
},
505+
{
506+
name: "scrapeInterval valid within limits-mix-of-minutes-seconds",
507+
uwmconfig: `prometheus:
508+
scrapeInterval: 1m30s
509+
`,
510+
expectedError: false,
511+
},
512+
{
513+
name: "scrapeInterval < allowed lower limit",
514+
uwmconfig: `prometheus:
515+
scrapeInterval: 2s
516+
`,
517+
expectedError: true,
518+
},
519+
{
520+
name: "scrapeInterval > allowed upper limit",
521+
uwmconfig: `prometheus:
522+
scrapeInterval: 10m
523+
`,
524+
expectedError: true,
525+
},
526+
{
527+
name: "incorrect scrape interval value",
528+
uwmconfig: `prometheus:
529+
scrapeInterval: 1234www
530+
`,
531+
expectedError: true,
532+
},
533+
} {
534+
t.Run(tc.name, func(t *testing.T) {
535+
_, err := NewUserConfigFromString(tc.uwmconfig)
536+
if tc.expectedError {
537+
require.Error(t, err)
538+
return
539+
}
540+
require.NoError(t, err)
541+
})
542+
}
543+
}
544+
487545
func TestCollectionProfilePreCheck(t *testing.T) {
488546
for _, tc := range []struct {
489547
name string

pkg/manifests/manifests.go

+4
Original file line numberDiff line numberDiff line change
@@ -1658,6 +1658,10 @@ func (f *Factory) PrometheusUserWorkload(grpcTLS *v1.Secret) (*monv1.Prometheus,
16581658
if err != nil {
16591659
return nil, err
16601660
}
1661+
if f.config.UserWorkloadConfiguration.Prometheus.ScrapeInterval != "" {
1662+
p.Spec.ScrapeInterval = monv1.Duration(f.config.UserWorkloadConfiguration.Prometheus.ScrapeInterval)
1663+
}
1664+
16611665
if f.config.UserWorkloadConfiguration.Prometheus.LogLevel != "" {
16621666
p.Spec.LogLevel = f.config.UserWorkloadConfiguration.Prometheus.LogLevel
16631667
}

pkg/manifests/manifests_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -1639,6 +1639,7 @@ func TestPrometheusUserWorkloadConfiguration(t *testing.T) {
16391639
c := NewDefaultConfig()
16401640

16411641
uwc, err := NewUserConfigFromString(`prometheus:
1642+
scrapeInterval: 15s
16421643
resources:
16431644
requests:
16441645
cpu: 100m
@@ -1666,6 +1667,10 @@ func TestPrometheusUserWorkloadConfiguration(t *testing.T) {
16661667
t.Fatal(err)
16671668
}
16681669

1670+
if p.Spec.ScrapeInterval != "15s" {
1671+
t.Fatal("Prometheus UWM scrapeInterval not configured correctly")
1672+
}
1673+
16691674
if p.Spec.TopologySpreadConstraints[0].MaxSkew != 1 {
16701675
t.Fatal("Prometheus UWM spread constraints MaxSkew not configured correctly")
16711676
}

pkg/manifests/types.go

+6
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,12 @@ type AlertmanagerUserWorkloadConfig struct {
601601
// The `PrometheusRestrictedConfig` resource defines the settings for the
602602
// Prometheus component that monitors user-defined projects.
603603
type PrometheusRestrictedConfig struct {
604+
// Configures the default interval between consecutive scrapes in case the `ServiceMonitor` or `PodMonitor` resource does not specify any value.
605+
// The interval must be set between 5 seconds and 5 minutes.
606+
// The value can be expressed in:
607+
// seconds (for example `30s`.), minutes (for example `1m`.) or a mix of minutes and seconds (for example `1m30s`.).
608+
// The default value is `30s`.
609+
ScrapeInterval string `json:"scrapeInterval,omitempty"`
604610
// Configures additional Alertmanager instances that receive alerts from
605611
// the Prometheus component. By default, no additional Alertmanager
606612
// instances are configured.

pkg/operator/operator.go

+1
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,7 @@ func (o *Operator) Config(ctx context.Context, key string) (*manifests.Config, e
995995
if err != nil {
996996
return nil, err
997997
}
998+
998999
err = c.Precheck()
9991000
if err != nil {
10001001
return nil, err

test/e2e/config_test.go

+31
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ import (
3030
"github.com/openshift/cluster-monitoring-operator/test/e2e/framework"
3131
"github.com/stretchr/testify/require"
3232

33+
monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
3334
v1 "k8s.io/api/core/v1"
35+
3436
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3537
)
3638

@@ -585,6 +587,7 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
585587

586588
uwmCM := f.BuildUserWorkloadConfigMap(t,
587589
fmt.Sprintf(`prometheus:
590+
scrapeInterval: 15s
588591
enforcedTargetLimit: 10
589592
enforcedLabelLimit: 500
590593
enforcedLabelNameLengthLimit: 50
@@ -671,6 +674,10 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
671674
name: "assert query log file value is set and correct",
672675
assertion: assertQueryLogValueEquals(f.UserWorkloadMonitoringNs, crName, "/tmp/test.log"),
673676
},
677+
{
678+
name: "assert scrape interval is configured",
679+
assertion: assertScrapeInterval("15s"),
680+
},
674681
} {
675682
t.Run(tc.name, tc.assertion)
676683
}
@@ -1028,6 +1035,30 @@ func assertRemoteWriteWasSet(namespace, crName, urlValue string) func(t *testing
10281035
}
10291036
}
10301037

1038+
func assertScrapeInterval(scrapeInterval string) func(*testing.T) {
1039+
ctx := context.Background()
1040+
return func(t *testing.T) {
1041+
err := framework.Poll(time.Second, 5*time.Minute, func() error {
1042+
p, err := f.MonitoringClient.Prometheuses(f.UserWorkloadMonitoringNs).Get(ctx, "user-workload", metav1.GetOptions{})
1043+
if err != nil {
1044+
return err
1045+
}
1046+
1047+
if p.Spec.ScrapeInterval == "" {
1048+
return errors.New("scrapeInterval is not set")
1049+
} else if p.Spec.ScrapeInterval != monv1.Duration(scrapeInterval) {
1050+
return fmt.Errorf("expected scrapeInterval to be %s, but got %s", scrapeInterval, p.Spec.ScrapeInterval)
1051+
}
1052+
1053+
return nil
1054+
})
1055+
1056+
if err != nil {
1057+
t.Fatalf("Timed out waiting for scrapeInterval configuration: %v", err)
1058+
}
1059+
}
1060+
}
1061+
10311062
func assertEnforcedTargetLimit(limit uint64) func(*testing.T) {
10321063
ctx := context.Background()
10331064
return func(t *testing.T) {

0 commit comments

Comments
 (0)