Skip to content

Commit b408a82

Browse files
committed
jsonnet: Support exluding namespaces from user-workload monitoring
This adds support for setting an `openshift.io/user-monitoring` label on namespaces to exclude them from user-workload monitoring. This is in addition to the exclusion of namespaces that have set a `true` value for the `openshift.io/cluster-monitoring` label.
1 parent f70f93a commit b408a82

File tree

6 files changed

+147
-1
lines changed

6 files changed

+147
-1
lines changed

assets/prometheus-user-workload/prometheus.yaml

+16
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ spec:
132132
operator: NotIn
133133
values:
134134
- "true"
135+
- key: openshift.io/user-monitoring
136+
operator: NotIn
137+
values:
138+
- "false"
135139
podMonitorSelector: {}
136140
priorityClassName: openshift-user-critical
137141
probeNamespaceSelector:
@@ -140,6 +144,10 @@ spec:
140144
operator: NotIn
141145
values:
142146
- "true"
147+
- key: openshift.io/user-monitoring
148+
operator: NotIn
149+
values:
150+
- "false"
143151
probeSelector: {}
144152
replicas: 2
145153
resources:
@@ -152,6 +160,10 @@ spec:
152160
operator: NotIn
153161
values:
154162
- "true"
163+
- key: openshift.io/user-monitoring
164+
operator: NotIn
165+
values:
166+
- "false"
155167
ruleSelector:
156168
matchLabels:
157169
openshift.io/prometheus-rule-evaluation-scope: leaf-prometheus
@@ -169,6 +181,10 @@ spec:
169181
operator: NotIn
170182
values:
171183
- "true"
184+
- key: openshift.io/user-monitoring
185+
operator: NotIn
186+
values:
187+
- "false"
172188
serviceMonitorSelector: {}
173189
thanos:
174190
image: quay.io/thanos/thanos:v0.22.0

assets/thanos-ruler/thanos-ruler.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ spec:
8686
operator: NotIn
8787
values:
8888
- "true"
89+
- key: openshift.io/user-monitoring
90+
operator: NotIn
91+
values:
92+
- "false"
8993
ruleSelector:
9094
matchExpressions:
9195
- key: openshift.io/prometheus-rule-evaluation-scope

jsonnet/main.jsonnet

+5
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ local commonConfig = {
4646
operator: 'NotIn',
4747
values: ['true'],
4848
},
49+
{
50+
key: 'openshift.io/user-monitoring',
51+
operator: 'NotIn',
52+
values: ['false'],
53+
},
4954
],
5055
},
5156
mixinNamespaceSelector: 'namespace=~"(openshift-.*|kube-.*|default)"',

test/e2e/framework/client.go

+44
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,27 @@ func (c *PrometheusClient) PrometheusQuery(query string) ([]byte, error) {
169169
return body, nil
170170
}
171171

172+
// PrometheusTargets runs an HTTP GET request against the Prometheus targets API and returns
173+
// the response body.
174+
func (c *PrometheusClient) PrometheusTargets() ([]byte, error) {
175+
resp, err := c.Do("GET", "/api/v1/targets", nil)
176+
if err != nil {
177+
return nil, err
178+
}
179+
defer resp.Body.Close()
180+
181+
body, err := ioutil.ReadAll(resp.Body)
182+
if err != nil {
183+
return nil, err
184+
}
185+
186+
if resp.StatusCode != http.StatusOK {
187+
return nil, fmt.Errorf("unexpected status code response, want %d, got %d (%q)", http.StatusOK, resp.StatusCode, ClampMax(body))
188+
}
189+
190+
return body, nil
191+
}
192+
172193
// PrometheusRules runs an HTTP GET request against the Prometheus rules API and returns
173194
// the response body.
174195
func (c *PrometheusClient) PrometheusRules() ([]byte, error) {
@@ -348,3 +369,26 @@ func (c *PrometheusClient) WaitForRulesReturn(t *testing.T, timeout time.Duratio
348369
t.Fatal(err)
349370
}
350371
}
372+
373+
// WaitForTargetsReturn waits for Prometheus targets for a given time interval
374+
// and returns successfully if the validate function doesn't return an error.
375+
func (c *PrometheusClient) WaitForTargetsReturn(t *testing.T, timeout time.Duration, validate func([]byte) error) {
376+
t.Helper()
377+
378+
err := Poll(5*time.Second, timeout, func() error {
379+
body, err := c.PrometheusTargets()
380+
if err != nil {
381+
return errors.Wrap(err, "error getting targets")
382+
}
383+
384+
if err := validate(body); err != nil {
385+
return errors.Wrapf(err, "error validating response body %q", string(body))
386+
}
387+
388+
return nil
389+
})
390+
391+
if err != nil {
392+
t.Fatal(err)
393+
}
394+
}

test/e2e/user_workload_monitoring_test.go

+56-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ const (
4646
userWorkloadTestNs = "user-workload-test"
4747
)
4848

49-
5049
type scenario struct {
5150
name string
5251
assertion func(*testing.T)
@@ -106,6 +105,7 @@ func TestUserWorkloadMonitoringWithStorage(t *testing.T) {
106105
{"assert prometheus and alertmanager is not deployed in user namespace", assertPrometheusAlertmanagerInUserNamespace},
107106
{"assert grpc tls rotation", assertGRPCTLSRotation},
108107
{"assert enforced target limit is configured", assertEnforcedTargetLimit(10)},
108+
{"assert namespace opt out removes appropriate targets", assertNamespaceOptOut},
109109
{"enable user workload monitoring, assert prometheus rollout", createUserWorkloadAssets(cm)},
110110
{"set VolumeClaimTemplate for prometheus CR, assert that it is created", assertVolumeClaimsConfigAndRollout(rolloutParams{
111111
namespace: f.UserWorkloadMonitoringNs,
@@ -1181,6 +1181,61 @@ func assertEnforcedTargetLimit(limit uint64) func(*testing.T) {
11811181
}
11821182
}
11831183

1184+
func assertNamespaceOptOut(t *testing.T) {
1185+
ctx := context.Background()
1186+
1187+
serviceMonitorJobName := "serviceMonitor/user-workload-test/prometheus-example-monitor/0"
1188+
1189+
// Ensure the target for the example ServiceMonitor exists.
1190+
f.ThanosQuerierClient.WaitForTargetsReturn(t, 5*time.Minute, func(body []byte) error {
1191+
return getActiveTarget(body, serviceMonitorJobName)
1192+
})
1193+
1194+
// Add opt-out label to namespace.
1195+
ns, err := f.KubeClient.CoreV1().Namespaces().Get(ctx, userWorkloadTestNs, metav1.GetOptions{})
1196+
if err != nil {
1197+
t.Fatalf("Failed to fetch user-workload namespace: %v", err)
1198+
}
1199+
1200+
labels := ns.GetLabels()
1201+
labels["openshift.io/user-monitoring"] = "false"
1202+
ns.SetLabels(labels)
1203+
1204+
_, err = f.KubeClient.CoreV1().Namespaces().Update(ctx, ns, metav1.UpdateOptions{})
1205+
if err != nil {
1206+
t.Fatalf("Failed to apply user-monitoring opt-out label: %v", err)
1207+
}
1208+
1209+
// Ensure the target for the example ServiceMonitor is removed.
1210+
f.ThanosQuerierClient.WaitForTargetsReturn(t, 5*time.Minute, func(body []byte) error {
1211+
if err := getActiveTarget(body, serviceMonitorJobName); err == nil {
1212+
return fmt.Errorf("target '%s' exists, but should not", serviceMonitorJobName)
1213+
}
1214+
1215+
return nil
1216+
})
1217+
1218+
// Remove opt-out label from namespace.
1219+
ns, err = f.KubeClient.CoreV1().Namespaces().Get(ctx, userWorkloadTestNs, metav1.GetOptions{})
1220+
if err != nil {
1221+
t.Fatalf("Failed to fetch user-workload namespace: %v", err)
1222+
}
1223+
1224+
labels = ns.GetLabels()
1225+
delete(labels, "openshift.io/user-monitoring")
1226+
ns.SetLabels(labels)
1227+
1228+
_, err = f.KubeClient.CoreV1().Namespaces().Update(ctx, ns, metav1.UpdateOptions{})
1229+
if err != nil {
1230+
t.Fatalf("Failed to remove user-monitoring opt-out label: %v", err)
1231+
}
1232+
1233+
// Ensure the target for the example ServiceMonitor is recreated.
1234+
f.ThanosQuerierClient.WaitForTargetsReturn(t, 5*time.Minute, func(body []byte) error {
1235+
return getActiveTarget(body, serviceMonitorJobName)
1236+
})
1237+
}
1238+
11841239
func updateConfigmap(cm *v1.ConfigMap) func(t *testing.T) {
11851240
ctx := context.Background()
11861241
return func(t *testing.T) {

test/e2e/utils.go

+22
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,28 @@ import (
2020
"github.com/Jeffail/gabs"
2121
)
2222

23+
func getActiveTarget(body []byte, jobName string) error {
24+
j, err := gabs.ParseJSON([]byte(body))
25+
if err != nil {
26+
return err
27+
}
28+
29+
activeJobs, err := j.Path("data.activeTargets").Children()
30+
if err != nil {
31+
return err
32+
}
33+
34+
for _, job := range activeJobs {
35+
name := job.S("discoveredLabels").S("job").Data().(string)
36+
37+
if name == jobName {
38+
return nil
39+
}
40+
}
41+
42+
return fmt.Errorf("job name '%s' not found in active targets", jobName)
43+
}
44+
2345
func getThanosRules(body []byte, expGroupName, expRuleName string) error {
2446
j, err := gabs.ParseJSON([]byte(body))
2547
if err != nil {

0 commit comments

Comments
 (0)