Skip to content

Commit 3287329

Browse files
Limit some high cardinality metrics by default
1 parent db29c1c commit 3287329

File tree

4 files changed

+143
-257
lines changed

4 files changed

+143
-257
lines changed

examples/prometheus/prometheus.yaml

+39-21
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ parameters:
2828
name: SESSION_SECRET
2929
generate: expression
3030
from: "[a-zA-Z0-9]{43}"
31+
3132
objects:
3233
# Authorize the prometheus service account to read data about the cluster
3334
- apiVersion: v1
@@ -255,18 +256,20 @@ objects:
255256
miqTarget: "ContainerNode"
256257
severity: "HIGH"
257258
message: "{{$labels.instance}} is down"
259+
258260
recording.rules: |
259261
groups:
260262
- name: aggregate_container_resources
261263
rules:
262264
- record: container_cpu_usage_rate
263-
expr: sum without (cpu) (rate(container_cpu_usage_seconds_total[3m]))
265+
expr: sum without (cpu) (rate(container_cpu_usage_seconds_total[5m]))
264266
- record: container_memory_rss_by_type
265267
expr: container_memory_rss{id=~"/|/system.slice|/kubepods.slice"} > 0
266268
- record: container_cpu_usage_percent_by_host
267-
expr: sum by (hostname,type)(rate(container_cpu_usage_seconds_total{id="/"}[3m])) / on (hostname,type) machine_cpu_cores
268-
- record: apiserver_request_count_by_resources
269+
expr: sum by (hostname,type)(rate(container_cpu_usage_seconds_total{id="/"}[5m])) / on (hostname,type) machine_cpu_cores
270+
- record: apiserver_request_count_rate_by_resources
269271
expr: sum without (client,instance,contentType) (rate(apiserver_request_count[5m]))
272+
270273
prometheus.yml: |
271274
rule_files:
272275
- '*.rules'
@@ -304,24 +307,6 @@ objects:
304307
action: keep
305308
regex: default;kubernetes;https
306309
307-
# Scrape config for nodes.
308-
#
309-
# Each node exposes a /metrics endpoint that contains operational metrics for
310-
# the Kubelet and other components.
311-
- job_name: 'kubernetes-nodes'
312-
313-
scheme: https
314-
tls_config:
315-
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
316-
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
317-
318-
kubernetes_sd_configs:
319-
- role: node
320-
321-
relabel_configs:
322-
- action: labelmap
323-
regex: __meta_kubernetes_node_label_(.+)
324-
325310
# Scrape config for controllers.
326311
#
327312
# Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
@@ -352,6 +337,31 @@ objects:
352337
regex: (.+)(?::\d+)
353338
replacement: $1:8444
354339
340+
# Scrape config for nodes.
341+
#
342+
# Each node exposes a /metrics endpoint that contains operational metrics for
343+
# the Kubelet and other components.
344+
- job_name: 'kubernetes-nodes'
345+
346+
scheme: https
347+
tls_config:
348+
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
349+
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
350+
351+
kubernetes_sd_configs:
352+
- role: node
353+
354+
# Drop a very high cardinality metric that is incorrect in 3.7. It will be
355+
# fixed in 3.9.
356+
metric_relabel_configs:
357+
- source_labels: [__name__]
358+
action: drop
359+
regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)'
360+
361+
relabel_configs:
362+
- action: labelmap
363+
regex: __meta_kubernetes_node_label_(.+)
364+
355365
# Scrape config for cAdvisor.
356366
#
357367
# Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
@@ -368,6 +378,14 @@ objects:
368378
kubernetes_sd_configs:
369379
- role: node
370380
381+
# Exclude a set of high cardinality metrics that can contribute to significant
382+
# memory use in large clusters. These can be selectively enabled as necessary
383+
# for medium or small clusters.
384+
metric_relabel_configs:
385+
- source_labels: [__name__]
386+
action: drop
387+
regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))'
388+
371389
relabel_configs:
372390
- action: labelmap
373391
regex: __meta_kubernetes_node_label_(.+)

hack/update-generated-bindata.sh

+4
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ pushd "${OS_ROOT}" > /dev/null
2323
-ignore ".*\.go$" \
2424
-ignore "\.DS_Store" \
2525
-ignore application-template.json \
26+
-ignore "prometheus-standalone.yaml" \
27+
-ignore "node-exporter.yaml" \
2628
examples/image-streams/... \
2729
examples/db-templates/... \
2830
examples/jenkins \
@@ -44,6 +46,8 @@ pushd "${OS_ROOT}" > /dev/null
4446
-ignore "OWNERS" \
4547
-ignore "\.DS_Store" \
4648
-ignore ".*\.(go|md)$" \
49+
-ignore "prometheus-standalone.yaml" \
50+
-ignore "node-exporter.yaml" \
4751
test/extended/testdata/... \
4852
test/integration/testdata \
4953
examples/db-templates \

pkg/oc/bootstrap/bindata.go

+50-118
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)