diff --git a/doc/admin/observability/alerts.md b/doc/admin/observability/alerts.md
index 7ec8c026fe1c..9b20d5f96309 100644
--- a/doc/admin/observability/alerts.md
+++ b/doc/admin/observability/alerts.md
@@ -5931,7 +5931,7 @@ with your code hosts connections or networking issues affecting communication wi
Technical details
-Custom alert query: `last_over_time(sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",job=~"^sourcegraph-executors.*"}[5m]))[5h:]) / (last_over_time(sum(increase(src_executor_processor_total{queue=~"${queue:regex}",job=~"^sourcegraph-executors.*"}[5m]))[5h:]) + last_over_time(sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",job=~"^sourcegraph-executors.*"}[5m]))[5h:])) * 100`
+Custom alert query: `last_over_time(sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",sg_job=~"^sourcegraph-executors.*"}[5m]))[5h:]) / (last_over_time(sum(increase(src_executor_processor_total{queue=~"${queue:regex}",sg_job=~"^sourcegraph-executors.*"}[5m]))[5h:]) + last_over_time(sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",sg_job=~"^sourcegraph-executors.*"}[5m]))[5h:])) * 100`
diff --git a/doc/admin/observability/dashboards.md b/doc/admin/observability/dashboards.md
index 059cb73ef4a5..de457329debe 100644
--- a/doc/admin/observability/dashboards.md
+++ b/doc/admin/observability/dashboards.md
@@ -17470,7 +17470,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100100`
Technical details
-Query: `sum(src_executor_processor_handlers{queue=~"${queue:regex}",job=~"^sourcegraph-executors.*"})`
+Query: `sum(src_executor_processor_handlers{queue=~"${queue:regex}",sg_job=~"^sourcegraph-executors.*"})`
@@ -17489,7 +17489,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100110`
Technical details
-Query: `sum(increase(src_executor_processor_total{queue=~"${queue:regex}",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_executor_processor_total{queue=~"${queue:regex}",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17508,7 +17508,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100111`
Technical details
-Query: `sum by (le)(rate(src_executor_processor_duration_seconds_bucket{queue=~"${queue:regex}",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (le)(rate(src_executor_processor_duration_seconds_bucket{queue=~"${queue:regex}",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17527,7 +17527,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100112`
Technical details
-Query: `sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17546,7 +17546,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100113`
Technical details
-Query: `sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",job=~"^sourcegraph-executors.*"}[5m])) / (sum(increase(src_executor_processor_total{queue=~"${queue:regex}",job=~"^sourcegraph-executors.*"}[5m])) + sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",job=~"^sourcegraph-executors.*"}[5m]))) * 100`
+Query: `sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",sg_job=~"^sourcegraph-executors.*"}[5m])) / (sum(increase(src_executor_processor_total{queue=~"${queue:regex}",sg_job=~"^sourcegraph-executors.*"}[5m])) + sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",sg_job=~"^sourcegraph-executors.*"}[5m]))) * 100`
@@ -17569,7 +17569,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100200`
Technical details
-Query: `sum(increase(src_executor_run_lock_wait_total{sg_jobs=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_executor_run_lock_wait_total{sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17590,7 +17590,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100201`
Technical details
-Query: `sum(increase(src_executor_run_lock_held_total{sg_jobs=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_executor_run_lock_held_total{sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17611,7 +17611,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100300`
Technical details
-Query: `sum(increase(src_apiworker_apiclient_total{job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_apiworker_apiclient_total{sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17630,7 +17630,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100301`
Technical details
-Query: `sum by (le)(rate(src_apiworker_apiclient_duration_seconds_bucket{job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (le)(rate(src_apiworker_apiclient_duration_seconds_bucket{sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17649,7 +17649,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100302`
Technical details
-Query: `sum(increase(src_apiworker_apiclient_errors_total{job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_apiworker_apiclient_errors_total{sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17668,7 +17668,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100303`
Technical details
-Query: `sum(increase(src_apiworker_apiclient_errors_total{job=~"^sourcegraph-executors.*"}[5m])) / (sum(increase(src_apiworker_apiclient_total{job=~"^sourcegraph-executors.*"}[5m])) + sum(increase(src_apiworker_apiclient_errors_total{job=~"^sourcegraph-executors.*"}[5m]))) * 100`
+Query: `sum(increase(src_apiworker_apiclient_errors_total{sg_job=~"^sourcegraph-executors.*"}[5m])) / (sum(increase(src_apiworker_apiclient_total{sg_job=~"^sourcegraph-executors.*"}[5m])) + sum(increase(src_apiworker_apiclient_errors_total{sg_job=~"^sourcegraph-executors.*"}[5m]))) * 100`
@@ -17687,7 +17687,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100310`
Technical details
-Query: `sum by (op)(increase(src_apiworker_apiclient_total{job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (op)(increase(src_apiworker_apiclient_total{sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17706,7 +17706,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100311`
Technical details
-Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_apiclient_duration_seconds_bucket{job=~"^sourcegraph-executors.*"}[5m])))`
+Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_apiclient_duration_seconds_bucket{sg_job=~"^sourcegraph-executors.*"}[5m])))`
@@ -17725,7 +17725,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100312`
Technical details
-Query: `sum by (op)(increase(src_apiworker_apiclient_errors_total{job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (op)(increase(src_apiworker_apiclient_errors_total{sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17744,7 +17744,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100313`
Technical details
-Query: `sum by (op)(increase(src_apiworker_apiclient_errors_total{job=~"^sourcegraph-executors.*"}[5m])) / (sum by (op)(increase(src_apiworker_apiclient_total{job=~"^sourcegraph-executors.*"}[5m])) + sum by (op)(increase(src_apiworker_apiclient_errors_total{job=~"^sourcegraph-executors.*"}[5m]))) * 100`
+Query: `sum by (op)(increase(src_apiworker_apiclient_errors_total{sg_job=~"^sourcegraph-executors.*"}[5m])) / (sum by (op)(increase(src_apiworker_apiclient_total{sg_job=~"^sourcegraph-executors.*"}[5m])) + sum by (op)(increase(src_apiworker_apiclient_errors_total{sg_job=~"^sourcegraph-executors.*"}[5m]))) * 100`
@@ -17765,7 +17765,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100400`
Technical details
-Query: `sum(increase(src_apiworker_command_total{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_apiworker_command_total{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17784,7 +17784,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100401`
Technical details
-Query: `sum by (le)(rate(src_apiworker_command_duration_seconds_bucket{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (le)(rate(src_apiworker_command_duration_seconds_bucket{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17803,7 +17803,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100402`
Technical details
-Query: `sum(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_apiworker_command_errors_total{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17822,7 +17822,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100403`
Technical details
-Query: `sum(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m])) / (sum(increase(src_apiworker_command_total{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m])) + sum(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m]))) * 100`
+Query: `sum(increase(src_apiworker_command_errors_total{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m])) / (sum(increase(src_apiworker_command_total{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m])) + sum(increase(src_apiworker_command_errors_total{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m]))) * 100`
@@ -17841,7 +17841,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100410`
Technical details
-Query: `sum by (op)(increase(src_apiworker_command_total{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (op)(increase(src_apiworker_command_total{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17860,7 +17860,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100411`
Technical details
-Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_command_duration_seconds_bucket{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m])))`
+Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_command_duration_seconds_bucket{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m])))`
@@ -17879,7 +17879,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100412`
Technical details
-Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17898,7 +17898,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100413`
Technical details
-Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m])) / (sum by (op)(increase(src_apiworker_command_total{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m])) + sum by (op)(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^sourcegraph-executors.*"}[5m]))) * 100`
+Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m])) / (sum by (op)(increase(src_apiworker_command_total{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m])) + sum by (op)(increase(src_apiworker_command_errors_total{op=~"setup.*",sg_job=~"^sourcegraph-executors.*"}[5m]))) * 100`
@@ -17919,7 +17919,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100500`
Technical details
-Query: `sum(increase(src_apiworker_command_total{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_apiworker_command_total{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17938,7 +17938,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100501`
Technical details
-Query: `sum by (le)(rate(src_apiworker_command_duration_seconds_bucket{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (le)(rate(src_apiworker_command_duration_seconds_bucket{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17957,7 +17957,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100502`
Technical details
-Query: `sum(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_apiworker_command_errors_total{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -17976,7 +17976,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100503`
Technical details
-Query: `sum(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m])) / (sum(increase(src_apiworker_command_total{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m])) + sum(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m]))) * 100`
+Query: `sum(increase(src_apiworker_command_errors_total{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m])) / (sum(increase(src_apiworker_command_total{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m])) + sum(increase(src_apiworker_command_errors_total{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m]))) * 100`
@@ -17995,7 +17995,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100510`
Technical details
-Query: `sum by (op)(increase(src_apiworker_command_total{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (op)(increase(src_apiworker_command_total{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -18014,7 +18014,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100511`
Technical details
-Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_command_duration_seconds_bucket{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m])))`
+Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_command_duration_seconds_bucket{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m])))`
@@ -18033,7 +18033,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100512`
Technical details
-Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -18052,7 +18052,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100513`
Technical details
-Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m])) / (sum by (op)(increase(src_apiworker_command_total{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m])) + sum by (op)(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^sourcegraph-executors.*"}[5m]))) * 100`
+Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m])) / (sum by (op)(increase(src_apiworker_command_total{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m])) + sum by (op)(increase(src_apiworker_command_errors_total{op=~"exec.*",sg_job=~"^sourcegraph-executors.*"}[5m]))) * 100`
@@ -18073,7 +18073,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100600`
Technical details
-Query: `sum(increase(src_apiworker_command_total{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_apiworker_command_total{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -18092,7 +18092,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100601`
Technical details
-Query: `sum by (le)(rate(src_apiworker_command_duration_seconds_bucket{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (le)(rate(src_apiworker_command_duration_seconds_bucket{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -18111,7 +18111,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100602`
Technical details
-Query: `sum(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum(increase(src_apiworker_command_errors_total{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -18130,7 +18130,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100603`
Technical details
-Query: `sum(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m])) / (sum(increase(src_apiworker_command_total{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m])) + sum(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m]))) * 100`
+Query: `sum(increase(src_apiworker_command_errors_total{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m])) / (sum(increase(src_apiworker_command_total{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m])) + sum(increase(src_apiworker_command_errors_total{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m]))) * 100`
@@ -18149,7 +18149,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100610`
Technical details
-Query: `sum by (op)(increase(src_apiworker_command_total{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (op)(increase(src_apiworker_command_total{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -18168,7 +18168,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100611`
Technical details
-Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_command_duration_seconds_bucket{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m])))`
+Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_command_duration_seconds_bucket{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m])))`
@@ -18187,7 +18187,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100612`
Technical details
-Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m]))`
+Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m]))`
@@ -18206,7 +18206,7 @@ To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100613`
Technical details
-Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m])) / (sum by (op)(increase(src_apiworker_command_total{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m])) + sum by (op)(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^sourcegraph-executors.*"}[5m]))) * 100`
+Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m])) / (sum by (op)(increase(src_apiworker_command_total{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m])) + sum by (op)(increase(src_apiworker_command_errors_total{op=~"teardown.*",sg_job=~"^sourcegraph-executors.*"}[5m]))) * 100`
diff --git a/enterprise/cmd/executor/internal/metrics/metrics.go b/enterprise/cmd/executor/internal/metrics/metrics.go
index 7d78f3092f8f..a6240f86890d 100644
--- a/enterprise/cmd/executor/internal/metrics/metrics.go
+++ b/enterprise/cmd/executor/internal/metrics/metrics.go
@@ -43,20 +43,23 @@ func MakeExecutorMetricsGatherer(
// nodeExporterEndpoint is the URL of the local node_exporter endpoint, without
// the /metrics path. Disabled, when an empty string.
nodeExporterEndpoint string,
- // dockerRegsitryEndpoint is the URL of the intermediary caching docker registry,
+ // dockerRegistryEndpoint is the URL of the intermediary caching docker registry,
// for scraping and forwarding metrics. Disabled, when an empty string.
- dockerRegistryNodeExporterEndpoint string,
+ dockerRegistryEndpoint string,
) prometheus.GathererFunc {
nodeMetrics := newMetricsSyncPoint()
registryMetrics := newMetricsSyncPoint()
+ registryNodeMetrics := newMetricsSyncPoint()
- go backgroundCollectNodeExporterMetrics(nodeExporterEndpoint, nodeMetrics)
- go backgroundCollectNodeExporterMetrics(dockerRegistryNodeExporterEndpoint, registryMetrics)
+ go backgroundCollectMetrics(nodeExporterEndpoint+"/metrics", nodeMetrics)
+ go backgroundCollectMetrics(dockerRegistryEndpoint+"/proxy?module=registry", registryMetrics)
+ go backgroundCollectMetrics(dockerRegistryEndpoint+"/proxy?module=node", registryNodeMetrics)
return func() (mfs []*dto.MetricFamily, err error) {
// notify to start a scrape
nodeMetrics.notify.Signal()
registryMetrics.notify.Signal()
+ registryNodeMetrics.notify.Signal()
mfs, err = gatherer.Gather()
if err != nil {
@@ -64,12 +67,12 @@ func MakeExecutorMetricsGatherer(
}
if nodeExporterEndpoint != "" {
- result := <-registryMetrics.result
+ result := <-nodeMetrics.result
if result.err != nil {
logger.Warn("failed to get metrics for node exporter", log.Error(result.err))
}
for key, mf := range result.metrics {
- if strings.HasPrefix(key, "go_") || strings.HasPrefix(key, "promhttp_") || strings.HasPrefix(key, "process_") {
+ if filterMetric(key) {
continue
}
@@ -77,24 +80,46 @@ func MakeExecutorMetricsGatherer(
}
}
- if dockerRegistryNodeExporterEndpoint != "" {
- result := <-registryMetrics.result
- if result.err != nil {
- logger.Warn("failed to get metrics for docker registry", log.Error(result.err))
+ if dockerRegistryEndpoint != "" {
+ {
+ result := <-registryMetrics.result
+ if result.err != nil {
+ logger.Warn("failed to get metrics for docker registry", log.Error(result.err))
+ }
+ for key, mf := range result.metrics {
+ if filterMetric(key) {
+ continue
+ }
+
+ // should only be 1 registry, so we give it a set instance value
+ metricLabelInstance := "sg_instance"
+ instanceName := "docker-registry"
+ for _, m := range mf.Metric {
+ m.Label = append(m.Label, &dto.LabelPair{Name: &metricLabelInstance, Value: &instanceName})
+ }
+
+ mfs = append(mfs, mf)
+ }
}
- for key, mf := range result.metrics {
- if strings.HasPrefix(key, "go_") || strings.HasPrefix(key, "promhttp_") || strings.HasPrefix(key, "process_") {
- continue
+ {
+ result := <-registryNodeMetrics.result
+ if result.err != nil {
+ logger.Warn("failed to get metrics for docker registry", log.Error(result.err))
}
-
- // should only be 1 registry, so we give it a set instance value
- metricLabelInstance := "sg_instance"
- instanceName := "docker-regsitry"
- for _, m := range mf.Metric {
- m.Label = append(m.Label, &dto.LabelPair{Name: &metricLabelInstance, Value: &instanceName})
+ for key, mf := range result.metrics {
+ if filterMetric(key) {
+ continue
+ }
+
+ // should only be 1 registry, so we give it a set instance value
+ metricLabelInstance := "sg_instance"
+ instanceName := "docker-registry"
+ for _, m := range mf.Metric {
+ m.Label = append(m.Label, &dto.LabelPair{Name: &metricLabelInstance, Value: &instanceName})
+ }
+
+ mfs = append(mfs, mf)
}
-
- mfs = append(mfs, mf)
}
}
@@ -102,10 +127,14 @@ func MakeExecutorMetricsGatherer(
}
}
+func filterMetric(key string) bool {
+ return strings.HasPrefix(key, "go_") || strings.HasPrefix(key, "promhttp_") || strings.HasPrefix(key, "process_")
+}
+
// On notify, scrapes the specified endpoint for prometheus metrics and sends them down the
// associated channel. If the endpoint is "", then the channel is closed so that subsequent
// reads return an empty value instead of blocking indefinitely.
-func backgroundCollectNodeExporterMetrics(endpoint string, syncPoint metricsSyncPoint) {
+func backgroundCollectMetrics(endpoint string, syncPoint metricsSyncPoint) {
if endpoint == "" {
close(syncPoint.result)
return
@@ -114,7 +143,7 @@ func backgroundCollectNodeExporterMetrics(endpoint string, syncPoint metricsSync
collect := func() (map[string]*dto.MetricFamily, error) {
resp, err := (&http.Client{
Timeout: 2 * time.Second,
- }).Get(endpoint + "/metrics")
+ }).Get(endpoint)
if err != nil {
return nil, err
}
@@ -127,7 +156,7 @@ func backgroundCollectNodeExporterMetrics(endpoint string, syncPoint metricsSync
var parser expfmt.TextParser
mfMap, err := parser.TextToMetricFamilies(bytes.NewReader(b))
- return mfMap, errors.Wrapf(err, "parsing node_exporter metrics, response: %s", string(b))
+ return mfMap, errors.Wrapf(err, "parsing metrics, response: %s", string(b))
}
for {
diff --git a/monitoring/definitions/shared/codeintel.go b/monitoring/definitions/shared/codeintel.go
index efddf0f6e65d..c95d4871972f 100644
--- a/monitoring/definitions/shared/codeintel.go
+++ b/monitoring/definitions/shared/codeintel.go
@@ -287,6 +287,7 @@ func (codeIntelligence) NewExecutorProcessorGroup(containerName string) monitori
constructorOptions := ObservableConstructorOptions{
MetricNameRoot: "executor",
+ JobLabel: "sg_job",
MetricDescriptionRoot: "handler",
Filters: filters,
}
@@ -323,7 +324,7 @@ func (codeIntelligence) NewExecutorProcessorGroup(containerName string) monitori
// src_executor_run_lock_held_total
func (codeIntelligence) NewExecutorExecutionRunLockContentionGroup(containerName string) monitoring.Group {
constructor := func(metricNameRoot, legend string) Observable {
- filters := makeFilters("sg_jobs", containerName)
+ filters := makeFilters("sg_job", containerName)
return Observable{
Name: metricNameRoot + "_total",
Description: fmt.Sprintf("milliseconds %s every 5m", legend),
@@ -361,6 +362,7 @@ func (codeIntelligence) NewExecutorSetupCommandGroup(containerName string) monit
ObservableConstructorOptions: ObservableConstructorOptions{
MetricNameRoot: "apiworker_command",
+ JobLabel: "sg_job",
MetricDescriptionRoot: "command",
Filters: []string{`op=~"setup.*"`},
By: []string{"op"},
@@ -394,6 +396,7 @@ func (codeIntelligence) NewExecutorExecutionCommandGroup(containerName string) m
ObservableConstructorOptions: ObservableConstructorOptions{
MetricNameRoot: "apiworker_command",
+ JobLabel: "sg_job",
MetricDescriptionRoot: "command",
Filters: []string{`op=~"exec.*"`},
By: []string{"op"},
@@ -427,6 +430,7 @@ func (codeIntelligence) NewExecutorTeardownCommandGroup(containerName string) mo
ObservableConstructorOptions: ObservableConstructorOptions{
MetricNameRoot: "apiworker_command",
+ JobLabel: "sg_job",
MetricDescriptionRoot: "command",
Filters: []string{`op=~"teardown.*"`},
By: []string{"op"},
@@ -460,6 +464,7 @@ func (codeIntelligence) NewExecutorAPIClientGroup(containerName string) monitori
ObservableConstructorOptions: ObservableConstructorOptions{
MetricNameRoot: "apiworker_apiclient",
+ JobLabel: "sg_job",
MetricDescriptionRoot: "client",
Filters: nil,
By: []string{"op"},