@@ -28,6 +28,7 @@ parameters:
28
28
name : SESSION_SECRET
29
29
generate : expression
30
30
from : " [a-zA-Z0-9]{43}"
31
+
31
32
objects :
32
33
# Authorize the prometheus service account to read data about the cluster
33
34
- apiVersion : v1
@@ -255,18 +256,20 @@ objects:
255
256
miqTarget: "ContainerNode"
256
257
severity: "HIGH"
257
258
message: "{{$labels.instance}} is down"
259
+
258
260
recording.rules : |
259
261
groups:
260
262
- name: aggregate_container_resources
261
263
rules:
262
264
- record: container_cpu_usage_rate
263
- expr: sum without (cpu) (rate(container_cpu_usage_seconds_total[3m ]))
265
+ expr: sum without (cpu) (rate(container_cpu_usage_seconds_total[5m ]))
264
266
- record: container_memory_rss_by_type
265
267
expr: container_memory_rss{id=~"/|/system.slice|/kubepods.slice"} > 0
266
268
- record: container_cpu_usage_percent_by_host
267
- expr: sum by (hostname,type)(rate(container_cpu_usage_seconds_total{id="/"}[3m ])) / on (hostname,type) machine_cpu_cores
268
- - record: apiserver_request_count_by_resources
269
+ expr: sum by (hostname,type)(rate(container_cpu_usage_seconds_total{id="/"}[5m ])) / on (hostname,type) machine_cpu_cores
270
+ - record: apiserver_request_count_rate_by_resources
269
271
expr: sum without (client,instance,contentType) (rate(apiserver_request_count[5m]))
272
+
270
273
prometheus.yml : |
271
274
rule_files:
272
275
- '*.rules'
@@ -304,24 +307,6 @@ objects:
304
307
action: keep
305
308
regex: default;kubernetes;https
306
309
307
- # Scrape config for nodes.
308
- #
309
- # Each node exposes a /metrics endpoint that contains operational metrics for
310
- # the Kubelet and other components.
311
- - job_name: 'kubernetes-nodes'
312
-
313
- scheme: https
314
- tls_config:
315
- ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
316
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
317
-
318
- kubernetes_sd_configs:
319
- - role: node
320
-
321
- relabel_configs:
322
- - action: labelmap
323
- regex: __meta_kubernetes_node_label_(.+)
324
-
325
310
# Scrape config for controllers.
326
311
#
327
312
# Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
@@ -352,6 +337,31 @@ objects:
352
337
regex: (.+)(?::\d+)
353
338
replacement: $1:8444
354
339
340
+ # Scrape config for nodes.
341
+ #
342
+ # Each node exposes a /metrics endpoint that contains operational metrics for
343
+ # the Kubelet and other components.
344
+ - job_name: 'kubernetes-nodes'
345
+
346
+ scheme: https
347
+ tls_config:
348
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
349
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
350
+
351
+ kubernetes_sd_configs:
352
+ - role: node
353
+
354
+ # Drop a very high cardinality metric that is incorrect in 3.7. It will be
355
+ # fixed in 3.9.
356
+ metric_relabel_configs:
357
+ - source_labels: [__name__]
358
+ action: drop
359
+ regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)'
360
+
361
+ relabel_configs:
362
+ - action: labelmap
363
+ regex: __meta_kubernetes_node_label_(.+)
364
+
355
365
# Scrape config for cAdvisor.
356
366
#
357
367
# Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
@@ -368,6 +378,14 @@ objects:
368
378
kubernetes_sd_configs:
369
379
- role: node
370
380
381
+ # Exclude a set of high cardinality metrics that can contribute to significant
382
+ # memory use in large clusters. These can be selectively enabled as necessary
383
+ # for medium or small clusters.
384
+ metric_relabel_configs:
385
+ - source_labels: [__name__]
386
+ action: drop
387
+ regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))'
388
+
371
389
relabel_configs:
372
390
- action: labelmap
373
391
regex: __meta_kubernetes_node_label_(.+)
0 commit comments