Skip to content

Commit 1ab34a5

Browse files
committed
MON-4057: Expose scrapeInterval setting for UWM Prometheus
Allows to configure scrapeInterval between 5s and 5m in `openshift-user-workload-monitoring/user-workload-monitoring-config` configmap under `prometheus` field. Signed-off-by: Jayapriya Pai <[email protected]>
1 parent 9018920 commit 1ab34a5

File tree

10 files changed

+133
-2
lines changed

10 files changed

+133
-2
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Note: This CHANGELOG is only for the monitoring team to track all monitoring related changes. Please see OpenShift release notes for official changes.
22

3+
## 4.18
4+
5+
- [#2503] (https://github.com/openshift/cluster-monitoring-operator/issues/2503) Expose `scrapeInterval` setting for UWM Prometheus.
6+
37
## 4.17
48

59
- [#2409](https://github.com/openshift/cluster-monitoring-operator/issues/2409) Remove prometheus-adapter code from CMO

Documentation/api.md

+1
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ The `PrometheusRestrictedConfig` resource defines the settings for the Prometheu
484484

485485
| Property | Type | Description |
486486
| -------- | ---- | ----------- |
487+
| scrapeInterval | string | Configures the default interval between consecutive scrapes in case the ServiceMonitor or PodMonitor resource does not specify any value. The default value is `30s`. |
487488
| additionalAlertmanagerConfigs | [][AdditionalAlertmanagerConfig](#additionalalertmanagerconfig) | Configures additional Alertmanager instances that receive alerts from the Prometheus component. By default, no additional Alertmanager instances are configured. |
488489
| enforcedLabelLimit | *uint64 | Specifies a per-scrape limit on the number of labels accepted for a sample. If the number of labels exceeds this limit after metric relabeling, the entire scrape is treated as failed. The default value is `0`, which means that no limit is set. |
489490
| enforcedLabelNameLengthLimit | *uint64 | Specifies a per-scrape limit on the length of a label name for a sample. If the length of a label name exceeds this limit after metric relabeling, the entire scrape is treated as failed. The default value is `0`, which means that no limit is set. |

Documentation/openshiftdocs/modules/prometheusrestrictedconfig.adoc

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Appears in: link:userworkloadconfiguration.adoc[UserWorkloadConfiguration]
1818
[options="header"]
1919
|===
2020
| Property | Type | Description
21+
|scrapeInterval|string|Configures the default interval between consecutive scrapes in case the ServiceMonitor or PodMonitor resource does not specify any value. The default value is `30s`.
22+
2123
|additionalAlertmanagerConfigs|[]link:additionalalertmanagerconfig.adoc[AdditionalAlertmanagerConfig]|Configures additional Alertmanager instances that receive alerts from the Prometheus component. By default, no additional Alertmanager instances are configured.
2224

2325
|enforcedLabelLimit|*uint64|Specifies a per-scrape limit on the number of labels accepted for a sample. If the number of labels exceeds this limit after metric relabeling, the entire scrape is treated as failed. The default value is `0`, which means that no limit is set.

hack/build-jsonnet.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ for file in "${files[@]}"; do
3737
}&
3838

3939
# wait for at least one of the jobs to finish if there are more than maxProc jobs
40-
while [[ $(jobs -r | wc -l ) -ge "$maxProc" ]]; do wait -n; done
40+
while [[ $(jobs -r | wc -l ) -ge "$maxProc" ]]; do wait; done
4141
done
4242
# wait for all jobs to finish
4343
wait

pkg/manifests/config.go

+25
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"github.com/alecthomas/units"
2828
configv1 "github.com/openshift/api/config/v1"
29+
"github.com/prometheus/common/model"
2930
v1 "k8s.io/api/core/v1"
3031
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
3132
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
@@ -126,6 +127,27 @@ func (c Config) GetThanosRulerAlertmanagerConfigs() []AdditionalAlertmanagerConf
126127
return alertmanagerConfigs
127128
}
128129

130+
func (c Config) HasInvalidScrapeIntervalDuration() bool {
131+
if c.ClusterMonitoringConfiguration == nil || c.UserWorkloadConfiguration == nil {
132+
return false
133+
}
134+
135+
if c.UserWorkloadConfiguration.Prometheus == nil || c.UserWorkloadConfiguration.Prometheus.ScrapeInterval == "" {
136+
return false
137+
}
138+
139+
scrapeInterval, err := model.ParseDuration(c.UserWorkloadConfiguration.Prometheus.ScrapeInterval)
140+
141+
if err != nil {
142+
return true
143+
}
144+
145+
allowedLowerLimit, _ := model.ParseDuration("5s")
146+
allowedUpperLimit, _ := model.ParseDuration("5m")
147+
148+
return (scrapeInterval < allowedLowerLimit) || (scrapeInterval > allowedUpperLimit)
149+
}
150+
129151
type Images struct {
130152
MetricsServer string
131153
PromLabelProxy string
@@ -436,6 +458,9 @@ func (c *Config) LoadEnforcedBodySizeLimit(pcr PodCapacityReader, ctx context.Co
436458
}
437459

438460
func (c *Config) Precheck() error {
461+
if c.HasInvalidScrapeIntervalDuration() {
462+
return fmt.Errorf("%w: scrapeInterval specified should be between 5s and 5m", ErrUserWorkloadInvalidConfiguration)
463+
}
439464
if c.ClusterMonitoringConfiguration.PrometheusK8sConfig.CollectionProfile != FullCollectionProfile && !c.CollectionProfilesFeatureGateEnabled {
440465
return fmt.Errorf("%w: collectionProfiles is currently a TechPreview feature behind the \"MetricsCollectionProfiles\" feature-gate, to be able to use a profile different from the default (\"full\") please enable it first", ErrConfigValidation)
441466
}

pkg/manifests/config_test.go

+55
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,61 @@ func TestLoadEnforcedBodySizeLimit(t *testing.T) {
318318
}
319319
}
320320

321+
func TestScrapeIntervalUWMPreCheck(t *testing.T) {
322+
for _, tc := range []struct {
323+
name string
324+
uwmconfig string
325+
expectedError bool
326+
}{
327+
{
328+
name: "default",
329+
uwmconfig: "",
330+
expectedError: false,
331+
},
332+
{
333+
name: "scrapeInterval valid within limits",
334+
uwmconfig: `prometheus:
335+
scrapeInterval: 15s
336+
`,
337+
expectedError: false,
338+
},
339+
{
340+
name: "scrapeInterval < allowed lower limit",
341+
uwmconfig: `prometheus:
342+
scrapeInterval: 2s
343+
`,
344+
expectedError: true,
345+
},
346+
{
347+
name: "scrapeInterval > allowed upper limit",
348+
uwmconfig: `prometheus:
349+
scrapeInterval: 10m
350+
`,
351+
expectedError: true,
352+
},
353+
{
354+
name: "incorrect scrape interval value",
355+
uwmconfig: `prometheus:
356+
scrapeInterval: 1234www
357+
`,
358+
expectedError: true,
359+
},
360+
} {
361+
t.Run(tc.name, func(t *testing.T) {
362+
c := NewDefaultConfig()
363+
uwc, err := NewUserConfigFromString(tc.uwmconfig)
364+
require.NoError(t, err)
365+
c.UserWorkloadConfiguration = uwc
366+
err = c.Precheck()
367+
if err != nil && tc.expectedError {
368+
return
369+
}
370+
371+
require.NoError(t, err)
372+
})
373+
}
374+
}
375+
321376
func TestCollectionProfilePreCheck(t *testing.T) {
322377
for _, tc := range []struct {
323378
name string

pkg/manifests/manifests.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,8 @@ var (
329329
)
330330

331331
var (
332-
ErrConfigValidation = fmt.Errorf("invalid value for config")
332+
ErrConfigValidation = fmt.Errorf("invalid value for config")
333+
ErrUserWorkloadInvalidConfiguration = fmt.Errorf("invalid value for user-workload config")
333334
)
334335

335336
type Factory struct {
@@ -1658,6 +1659,10 @@ func (f *Factory) PrometheusUserWorkload(grpcTLS *v1.Secret) (*monv1.Prometheus,
16581659
if err != nil {
16591660
return nil, err
16601661
}
1662+
if f.config.UserWorkloadConfiguration.Prometheus.ScrapeInterval != "" {
1663+
p.Spec.ScrapeInterval = monv1.Duration(f.config.UserWorkloadConfiguration.Prometheus.ScrapeInterval)
1664+
}
1665+
16611666
if f.config.UserWorkloadConfiguration.Prometheus.LogLevel != "" {
16621667
p.Spec.LogLevel = f.config.UserWorkloadConfiguration.Prometheus.LogLevel
16631668
}

pkg/manifests/manifests_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -1643,6 +1643,7 @@ func TestPrometheusUserWorkloadConfiguration(t *testing.T) {
16431643
c := NewDefaultConfig()
16441644

16451645
uwc, err := NewUserConfigFromString(`prometheus:
1646+
scrapeInterval: 15s
16461647
resources:
16471648
requests:
16481649
cpu: 100m
@@ -1670,6 +1671,10 @@ func TestPrometheusUserWorkloadConfiguration(t *testing.T) {
16701671
t.Fatal(err)
16711672
}
16721673

1674+
if p.Spec.ScrapeInterval != "15s" {
1675+
t.Fatal("Prometheus UWM scrapeInterval not configured correctly")
1676+
}
1677+
16731678
if p.Spec.TopologySpreadConstraints[0].MaxSkew != 1 {
16741679
t.Fatal("Prometheus UWM spread constraints MaxSkew not configured correctly")
16751680
}

pkg/manifests/types.go

+3
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,9 @@ type AlertmanagerUserWorkloadConfig struct {
601601
// The `PrometheusRestrictedConfig` resource defines the settings for the
602602
// Prometheus component that monitors user-defined projects.
603603
type PrometheusRestrictedConfig struct {
604+
// Configures the default interval between consecutive scrapes in case the ServiceMonitor or PodMonitor resource does not specify any value.
605+
// The default value is `30s`.
606+
ScrapeInterval string `json:"scrapeInterval,omitempty"`
604607
// Configures additional Alertmanager instances that receive alerts from
605608
// the Prometheus component. By default, no additional Alertmanager
606609
// instances are configured.

test/e2e/config_test.go

+31
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ import (
3030
"github.com/openshift/cluster-monitoring-operator/test/e2e/framework"
3131
"github.com/stretchr/testify/require"
3232

33+
monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
3334
v1 "k8s.io/api/core/v1"
35+
3436
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3537
)
3638

@@ -585,6 +587,7 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
585587

586588
uwmCM := f.BuildUserWorkloadConfigMap(t,
587589
fmt.Sprintf(`prometheus:
590+
scrapeInterval: 15s
588591
enforcedTargetLimit: 10
589592
enforcedLabelLimit: 500
590593
enforcedLabelNameLengthLimit: 50
@@ -671,6 +674,10 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
671674
name: "assert query log file value is set and correct",
672675
assertion: assertQueryLogValueEquals(f.UserWorkloadMonitoringNs, crName, "/tmp/test.log"),
673676
},
677+
{
678+
name: "assert scrape interval is configured",
679+
assertion: assertScrapeInterval("15s"),
680+
},
674681
} {
675682
t.Run(tc.name, tc.assertion)
676683
}
@@ -1028,6 +1035,30 @@ func assertRemoteWriteWasSet(namespace, crName, urlValue string) func(t *testing
10281035
}
10291036
}
10301037

1038+
func assertScrapeInterval(scrapeInterval string) func(*testing.T) {
1039+
ctx := context.Background()
1040+
return func(t *testing.T) {
1041+
err := framework.Poll(time.Second, 5*time.Minute, func() error {
1042+
p, err := f.MonitoringClient.Prometheuses(f.UserWorkloadMonitoringNs).Get(ctx, "user-workload", metav1.GetOptions{})
1043+
if err != nil {
1044+
return err
1045+
}
1046+
1047+
if p.Spec.ScrapeInterval == "" {
1048+
return errors.New("scrapeInterval is not set")
1049+
} else if p.Spec.ScrapeInterval != monv1.Duration(scrapeInterval) {
1050+
return fmt.Errorf("expected scrapeInterval to be %s, but got %s", scrapeInterval, p.Spec.ScrapeInterval)
1051+
}
1052+
1053+
return nil
1054+
})
1055+
1056+
if err != nil {
1057+
t.Fatalf("Timed out waiting for scrapeInterval configuration: %v", err)
1058+
}
1059+
}
1060+
}
1061+
10311062
func assertEnforcedTargetLimit(limit uint64) func(*testing.T) {
10321063
ctx := context.Background()
10331064
return func(t *testing.T) {

0 commit comments

Comments
 (0)