jaypoulz
diff --git a/‎Makefile
Lines changed: 0 additions & 1 deletion b/‎Makefile
Lines changed: 0 additions & 1 deletion
diff --git a/‎ci-operator/jobs/infra-periodics.yaml
Lines changed: 0 additions & 15 deletions b/‎ci-operator/jobs/infra-periodics.yaml
Lines changed: 0 additions & 15 deletions
diff --git a/‎ci-operator/jobs/openshift/release/openshift-release-master-postsubmits.yaml
Lines changed: 0 additions & 15 deletions b/‎ci-operator/jobs/openshift/release/openshift-release-master-postsubmits.yaml
Lines changed: 0 additions & 15 deletions
diff --git a/‎ci-operator/jobs/openshift/release/openshift-release-master-presubmits.yaml
Lines changed: 1 addition & 16 deletions b/‎ci-operator/jobs/openshift/release/openshift-release-master-presubmits.yaml
Lines changed: 1 addition & 16 deletions
diff --git a/‎ci-operator/platform-balance/README.md
Lines changed: 1 addition & 1 deletion b/‎ci-operator/platform-balance/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎clusters/app.ci/assets/dptp-controller-manager.yaml
Lines changed: 1 addition & 1 deletion b/‎clusters/app.ci/assets/dptp-controller-manager.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎clusters/app.ci/prow-monitoring/build_cop.md renamed to ‎clusters/app.ci/openshift-user-workload-monitoring/build_cop.md
Lines changed: 2 additions & 2 deletions b/‎clusters/app.ci/prow-monitoring/build_cop.md renamed to ‎clusters/app.ci/openshift-user-workload-monitoring/build_cop.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎clusters/app.ci/openshift-user-workload-monitoring/mixins/_prometheus/dptp_alerts.libsonnet
Lines changed: 1 addition & 1 deletion b/‎clusters/app.ci/openshift-user-workload-monitoring/mixins/_prometheus/dptp_alerts.libsonnet
Lines changed: 1 addition & 1 deletion
diff --git a/‎clusters/app.ci/openshift-user-workload-monitoring/mixins/_prometheus/ghproxy_alerts.libsonnet
Lines changed: 6 additions & 6 deletions b/‎clusters/app.ci/openshift-user-workload-monitoring/mixins/_prometheus/ghproxy_alerts.libsonnet
Lines changed: 6 additions & 6 deletions
diff --git a/‎clusters/app.ci/openshift-user-workload-monitoring/mixins/_prometheus/prow_alerts.libsonnet
Lines changed: 1 addition & 1 deletion b/‎clusters/app.ci/openshift-user-workload-monitoring/mixins/_prometheus/prow_alerts.libsonnet
Lines changed: 1 addition & 1 deletion
@@ -267,7 +267,6 @@ verify-app-ci:
 
 mixins:
 	$(SKIP_PULL) || $(CONTAINER_ENGINE) pull registry.ci.openshift.org/ci/dashboards-validation:latest
-	$(CONTAINER_ENGINE) run $(USER) --platform linux/amd64 --user=$(UID) --rm -v "$(CURDIR):/release:z" registry.ci.openshift.org/ci/dashboards-validation:latest make -C /release/clusters/app.ci/prow-monitoring/mixins install all
 	$(CONTAINER_ENGINE) run $(USER) --platform linux/amd64 --user=$(UID) --rm -v "$(CURDIR):/release:z" registry.ci.openshift.org/ci/dashboards-validation:latest make -C /release/clusters/app.ci/openshift-user-workload-monitoring/mixins install all
 .PHONY: mixins
 
 
@@ -1436,16 +1436,6 @@ periodics:
       command:
       - applyconfig
       env:
-      - name: OPENSHIFT_MONITORING_CI_TOKEN_ON_HIVE
-        valueFrom:
-          secretKeyRef:
-            key: hive-openshift-monitoring-ci-token
-            name: hive-openshift-monitoring-credentials
-      - name: OPENSHIFT_PROMETHEUS_PASSWORD
-        valueFrom:
-          secretKeyRef:
-            key: prometheus-k8s-basic-auth-password
-            name: app-ci-openshift-monitoring-credentials
       - name: SLACK_API_URL
         valueFrom:
           secretKeyRef:
@@ -1456,11 +1446,6 @@ periodics:
           secretKeyRef:
             key: integration_key
             name: pagerduty
-      - name: PROMETHEUS_USER_WORKLOAD_TOKEN
-        valueFrom:
-          secretKeyRef:
-            key: sa.prometheus-user-workload.app.ci.token.txt
-            name: app-ci-openshift-user-workload-monitoring-credentials
       image: applyconfig:latest
       imagePullPolicy: Always
       name: ""
 
@@ -18,26 +18,11 @@ postsubmits:
         command:
         - applyconfig
         env:
-        - name: OPENSHIFT_MONITORING_CI_TOKEN_ON_HIVE
-          valueFrom:
-            secretKeyRef:
-              key: hive-openshift-monitoring-ci-token
-              name: hive-openshift-monitoring-credentials
-        - name: OPENSHIFT_PROMETHEUS_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              key: prometheus-k8s-basic-auth-password
-              name: app-ci-openshift-monitoring-credentials
         - name: PAGERDUTY_INTEGRATION_KEY
           valueFrom:
             secretKeyRef:
               key: integration_key
               name: pagerduty
-        - name: PROMETHEUS_USER_WORKLOAD_TOKEN
-          valueFrom:
-            secretKeyRef:
-              key: sa.prometheus-user-workload.app.ci.token.txt
-              name: app-ci-openshift-user-workload-monitoring-credentials
         - name: SLACK_API_URL
           valueFrom:
             secretKeyRef:
 
@@ -99,26 +99,11 @@ presubmits:
         env:
         - name: HOME
           value: /tmp
-        - name: OPENSHIFT_MONITORING_CI_TOKEN_ON_HIVE
-          valueFrom:
-            secretKeyRef:
-              key: hive-openshift-monitoring-ci-token
-              name: hive-openshift-monitoring-credentials
-        - name: OPENSHIFT_PROMETHEUS_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              key: prometheus-k8s-basic-auth-password
-              name: app-ci-openshift-monitoring-credentials
         - name: PAGERDUTY_INTEGRATION_KEY
           valueFrom:
             secretKeyRef:
               key: integration_key
               name: pagerduty
-        - name: PROMETHEUS_USER_WORKLOAD_TOKEN
-          valueFrom:
-            secretKeyRef:
-              key: sa.prometheus-user-workload.app.ci.token.txt
-              name: app-ci-openshift-user-workload-monitoring-credentials
         - name: SLACK_API_URL
           valueFrom:
             secretKeyRef:
@@ -847,7 +832,7 @@ presubmits:
       pj-rehearse.openshift.io/can-be-rehearsed: "true"
     name: pull-ci-openshift-release-master-generated-dashboards
     rerun_command: /test generated-dashboards
-    run_if_changed: ^(clusters/app.ci/prow-monitoring/.*)|^(clusters/app.ci/openshift-user-workload-monitoring/.*)
+    run_if_changed: ^(clusters/app.ci/openshift-user-workload-monitoring/.*)
     spec:
       containers:
       - args:
 
@@ -1,6 +1,6 @@
 # Rebalancing tests among platforms
 
-If test volume for a given platform exceeds [the Boskos lease capacity][boskos-leases], [`jobs-failing-with-lease-acquire-timeout`](../../clusters/app.ci/prow-monitoring/mixins/prometheus_out/prometheus-prow-rules_prometheusrule.yaml) will fire.
+If test volume for a given platform exceeds [the Boskos lease capacity][boskos-leases], [`jobs-failing-with-lease-acquire-timeout`](../../clusters/app.ci/openshift-user-workload-monitoring/mixins/prometheus_out/ci-alerts_prometheusrule.yaml) will fire.
 Presubmit jobs may be rebalanced to move platform-agnostic jobs to platforms with available capacity.
 Component teams may mark their presubmit jobs as platform-agnostic by configuring `as` names which exclude the platform slug (e.g. `aws`), whose absence is used as a marker of "this test is platform-agnostic".
 For example, see [release#10152][release-10152].
 
@@ -62,7 +62,7 @@ metadata:
   name: dptp-controller-manager
   namespace: ci
 spec:
-  # By default up{job="prow-monitoring/dptp-controller-manager"}.
+  # By default up{job="*/dptp-controller-manager"}.
   # We want up{job="dptp-controller-manager"} instead.
   jobLabel: app
   selector:
 
@@ -1,6 +1,6 @@
-# Build Cop Dashboard in prow-monitoring
+# Build Cop Dashboard in ci-monitoring
 
-The [build-cop dashboard](https://grafana-prow-monitoring.apps.ci.l2s4.p1.openshiftapps.com/d/6829209d59479d48073d09725ce807fa/build-cop-dashboard?orgId=1) in [prow-monitoring](README.md) is an alternative tool which shows the success rate for various types of Prow jobs in Build Cop reports. The data presented by these dashboards are derived from Prow's state and persist for a month. Every authenticated user of our CI cluster has access to the dashboard.
+The [build-cop dashboard](https://grafana-route-ci-grafana.apps.ci.l2s4.p1.openshiftapps.com/d/6829209d59479d48073d09725ce807fa/build-cop-dashboard?orgId=1) in [ci-monitoring](README.md) is an alternative tool which shows the success rate for various types of Prow jobs in Build Cop reports. The data presented by these dashboards are derived from Prow's state and persist for a month. Every authenticated user of our CI cluster has access to the dashboard.
 
 The Build Cop must keep track of passing rates for a number of job types. Normally, this would be done by viewing a filtered list of jobs in Deck. E.g., [the deck page](https://prow.ci.openshift.org/?job=*-master-e2e-aws) shows `Success rate over time: 3h: 78%, 12h: 81%, 48h: 77%` for job with name `*-master-e2e-aws`. With the dashboard, an overview of all job types can be seen with one panel.
 
 
@@ -37,7 +37,7 @@
               severity: 'critical',
             },
             annotations: {
-              message: 'Infrastructure CI job {{ $labels.job_name }} is failing. Investigate the symptoms, assess the urgency and take appropriate action (<https://grafana-prow-monitoring.apps.ci.l2s4.p1.openshiftapps.com/d/%s/dptp-dashboard?orgId=1&fullscreen&viewPanel=4|Grafana Dashboard> | <https://prow.ci.openshift.org/?job={{ $labels.job_name }}|Deck> | <https://github.com/openshift/release/blob/master/docs/dptp-triage-sop/infrastructure-jobs.md#{{ $labels.job_name}}|SOP>).' % $._config.grafanaDashboardIDs['dptp.json'],
+              message: 'Infrastructure CI job {{ $labels.job_name }} is failing. Investigate the symptoms, assess the urgency and take appropriate action (<https://grafana-route-ci-grafana.appss.ci.l2s4.p1.openshiftapps.com/d/%s/dptp-dashboard?orgId=1&fullscreen&viewPanel=4|Grafana Dashboard> | <https://prow.ci.openshift.org/?job={{ $labels.job_name }}|Deck> | <https://github.com/openshift/release/blob/master/docs/dptp-triage-sop/infrastructure-jobs.md#{{ $labels.job_name}}|SOP>).' % $._config.grafanaDashboardIDs['dptp.json'],
             },
           },
           {
 
@@ -13,7 +13,7 @@
               severity: 'critical',
             },
             annotations: {
-              message: 'The average size of the pending GH API request queue in ghproxy is {{ $value | humanize }} over the last 5 minutes, which can indicate insufficient proxy throughput. Inspect <https://prometheus-prow-monitoring.apps.ci.l2s4.p1.openshiftapps.com/graph?g0.range_input=1h&g0.end_input=2022-03-23%2016%3A22&g0.expr=sum_over_time(pending_outbound_requests%7Bcontainer%3D%22ghproxy%22%7D%5B5m%5D)%20%2F%20count_over_time(pending_outbound_requests%7Bcontainer%3D%22ghproxy%22%7D%5B5m%5D)%20%3E%20100&g0.tab=0|Prometheus> and if the metric is ramping up, consider whether changing ghproxy throttling parameters may be necessary',
+              message: 'The average size of the pending GH API request queue in ghproxy is {{ $value | humanize }} over the last 5 minutes, which can indicate insufficient proxy throughput. Inspect <https://console-openshift-console.apps.ci.l2s4.p1.openshiftapps.com/monitoring/alertrules?alerting-rule-name=ghproxy-too-many-pending-alerts|Prometheus> and if the metric is ramping up, consider whether changing ghproxy throttling parameters may be necessary',
             },
           },
           {
@@ -25,7 +25,7 @@
               severity: 'warning',
             },
             annotations: {
-              message: '{{ $value | humanize }}%% of all requests for {{ $labels.path }} through the GitHub proxy are errorring with code {{ $labels.status }}. Check <https://grafana-prow-monitoring.apps.ci.l2s4.p1.openshiftapps.com/d/%s/github-cache?orgId=1&refresh=1m&fullscreen&viewPanel=9|grafana>' % $._config.grafanaDashboardIDs['ghproxy.json'],
+              message: '{{ $value | humanize }}%% of all requests for {{ $labels.path }} through the GitHub proxy are errorring with code {{ $labels.status }}. Check <https://grafana-route-ci-grafana.appss.ci.l2s4.p1.openshiftapps.com/d/%s/github-cache?orgId=1&refresh=1m&fullscreen&viewPanel=9|grafana>' % $._config.grafanaDashboardIDs['ghproxy.json'],
             },
           },
           {
@@ -37,7 +37,7 @@
               severity: 'warning',
             },
             annotations: {
-              message: '{{ $value | humanize }}%% of all API requests through the GitHub proxy are errorring with code {{ $labels.status }}. Check <https://grafana-prow-monitoring.apps.ci.l2s4.p1.openshiftapps.com/d/%s/github-cache?orgId=1&fullscreen&viewPanel=8|grafana>' % $._config.grafanaDashboardIDs['ghproxy.json'],
+              message: '{{ $value | humanize }}%% of all API requests through the GitHub proxy are errorring with code {{ $labels.status }}. Check <https://grafana-route-ci-grafana.appss.ci.l2s4.p1.openshiftapps.com/d/%s/github-cache?orgId=1&fullscreen&viewPanel=8|grafana>' % $._config.grafanaDashboardIDs['ghproxy.json'],
             },
           },
           {
@@ -50,7 +50,7 @@
               severity: 'critical',
             },
             annotations: {
-              message: '{{ $labels.login }} may run out of API quota before the next reset. Check the <https://grafana-prow-monitoring.apps.ci.l2s4.p1.openshiftapps.com/d/d72fe8d0400b2912e319b1e95d0ab1b3/github-cache?orgId=1|dashboard>',
+              message: '{{ $labels.login }} may run out of API quota before the next reset. Check the <https://grafana-route-ci-grafana.appss.ci.l2s4.p1.openshiftapps.com/d/d72fe8d0400b2912e319b1e95d0ab1b3/github-cache?orgId=1|dashboard>',
             },
           },
           {
@@ -63,7 +63,7 @@
               severity: 'critical',
             },
             annotations: {
-              message: '{{ $labels.token_hash }} may run out of API quota before the next reset. Check the <https://grafana-prow-monitoring.apps.ci.l2s4.p1.openshiftapps.com/d/d72fe8d0400b2912e319b1e95d0ab1b3/github-cache?orgId=1|dashboard>',
+              message: '{{ $labels.token_hash }} may run out of API quota before the next reset. Check the <https://grafana-route-ci-grafana.appss.ci.l2s4.p1.openshiftapps.com/d/d72fe8d0400b2912e319b1e95d0ab1b3/github-cache?orgId=1|dashboard>',
             },
           },
           {
@@ -77,7 +77,7 @@
             },
             annotations: {
               message: |||
-                {{ $labels.token_hash }} uses 90% of the available inode (<https://grafana-prow-monitoring.apps.ci.l2s4.p1.openshiftapps.com/d/d72fe8d0400b2912e319b1e95d0ab1b3/github-cache?viewPanel=5&orgId=1|dashboard>)
+                {{ $labels.token_hash }} uses 90% of the available inode (<https://grafana-route-ci-grafana.appss.ci.l2s4.p1.openshiftapps.com/d/d72fe8d0400b2912e319b1e95d0ab1b3/github-cache?viewPanel=5&orgId=1|dashboard>)
 
                 Resolve by pruning the cache inside the ghproxy pod:
 
 
@@ -23,7 +23,7 @@
               severity: 'critical',
             },
             annotations: {
-              message: 'The backlog for {{ $labels.name }} is not getting drained. Check <https://prometheus-prow-monitoring.apps.ci.l2s4.p1.openshiftapps.com/graph?g0.range_input=1h&g0.expr=workqueue_depth%7Bname%3D~%22{{ $labels.name }}%22%7D%20%3E%20100&g0.tab=0|Prometheus>'
+              message: 'The backlog for {{ $labels.name }} is not getting drained. Check <https://console-openshift-console.apps.ci.l2s4.p1.openshiftapps.com/monitoring/alertrules?alerting-rule-name=prow-job-backlog-growing|Prometheus>'
             },
           },
           {
Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@`
`37`	`37`	`severity: 'critical',`
`38`	`38`	`},`
`39`	`39`	`annotations: {`
`40`		- message: 'Infrastructure CI job {{ $labels.job_name }} is failing. Investigate the symptoms, assess the urgency and take appropriate action (<https://grafana-prow-monitoring.apps.ci.l2s4.p1.openshiftapps.com/d/%s/dptp-dashboard?orgId=1&fullscreen&viewPanel=4\|Grafana Dashboard> \| <https://prow.ci.openshift.org/?job={{ $labels.job_name }}\|Deck> \| <https://github.com/openshift/release/blob/master/docs/dptp-triage-sop/infrastructure-jobs.md#{{ $labels.job_name}}\|SOP>).' % $._config.grafanaDashboardIDs['dptp.json'],
	`40`	+ message: 'Infrastructure CI job {{ $labels.job_name }} is failing. Investigate the symptoms, assess the urgency and take appropriate action (<https://grafana-route-ci-grafana.appss.ci.l2s4.p1.openshiftapps.com/d/%s/dptp-dashboard?orgId=1&fullscreen&viewPanel=4\|Grafana Dashboard> \| <https://prow.ci.openshift.org/?job={{ $labels.job_name }}\|Deck> \| <https://github.com/openshift/release/blob/master/docs/dptp-triage-sop/infrastructure-jobs.md#{{ $labels.job_name}}\|SOP>).' % $._config.grafanaDashboardIDs['dptp.json'],
`41`	`41`	`},`
`42`	`42`	`},`
`43`	`43`	`{`
Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@`
`23`	`23`	`severity: 'critical',`
`24`	`24`	`},`
`25`	`25`	`annotations: {`
`26`		`- message: 'The backlog for {{ $labels.name }} is not getting drained. Check <https://prometheus-prow-monitoring.apps.ci.l2s4.p1.openshiftapps.com/graph?g0.range_input=1h&g0.expr=workqueue_depth%7Bname%3D~%22{{ $labels.name }}%22%7D%20%3E%20100&g0.tab=0\|Prometheus>'`
	`26`	`+ message: 'The backlog for {{ $labels.name }} is not getting drained. Check <https://console-openshift-console.apps.ci.l2s4.p1.openshiftapps.com/monitoring/alertrules?alerting-rule-name=prow-job-backlog-growing\|Prometheus>'`
`27`	`27`	`},`
`28`	`28`	`},`
`29`	`29`	`{`