Skip to content

Commit a409a7e

Browse files
committed
fix: cpu utilization information may be bogus; switch to cgroup-based stats
This also adds a CPU Throttling metric.
1 parent 3d5ca5c commit a409a7e

File tree

2 files changed

+32
-14
lines changed

2 files changed

+32
-14
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,32 @@
1-
$NF !~ /timestamp/ && $3 !="swpd" {
2-
printf("\x1b[1;33m[CPU Utilization %4.1f%%] \x1b[0;33m%-30s \x1b[2m%14s %2s %2s %2s %2s %2s %sT%sZ\x1b[0m\n",
3-
$13+$14,
4-
pod,
5-
$4,
6-
$13,
7-
$14,
8-
$15,
9-
$16,
10-
$17,
11-
$(NF-1),
12-
$NF);
13-
fflush();
1+
$1=="usage_usec" {to=$2}
2+
$1=="user_usec" {us=$2}
3+
$1=="system_usec" {sy=$2}
4+
$1=="throttled_usec" {th=$2}
5+
$1=="nr_periods" {np=$2}
6+
$1=="nr_throttled" {nt=$2}
7+
$1=="nanos" {nanos=$2}
8+
$1=="timestamp" {now=$2}
9+
$1=="------" {
10+
if (length(prev) > 0) {
11+
delta = (nanos - prev) / 1000;
12+
13+
printf("\x1b[1;33m[CPU Utilization %4.1f%%] \x1b[0;33m%-30s \x1b[2m%s %s %s %s %s\x1b[0m\n",
14+
to/delta,
15+
pod,
16+
us,
17+
sy,
18+
to,
19+
th,
20+
timestamp);
21+
22+
printf("\x1b[1;31m[CPU Throttling %4.1f%%] \x1b[0;31m%-30s \x1b[2m%s %s %s\x1b[0m\n",
23+
nt/np,
24+
pod,
25+
nt,
26+
np,
27+
timestamp);
28+
fflush();
29+
}
30+
31+
prev = nanos;
1432
}

guidebooks/ml/codeflare/custodian/containers/cpu/cpu.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
kubectl get pod -l ${KUBE_PODFULL_LABEL_SELECTOR} -o name \
44
--field-selector=status.phase==Running \
55
| xargs -P512 -I {} -n1 \
6-
sh -c 'kubectl exec {} -- sh -c "TZ=UTC vmstat --timestamp 5" | awk -v pod={} -f /tmp/cpu.awk' 2> /dev/null
6+
sh -c 'kubectl exec {} -- sh -c "while true; do echo \"timestamp \\\$(date -u)\"; echo \"nanos \\\$(date +%s%N)\"; cat /sys/fs/cgroup/cpu.stat; echo ------; sleep 1; done" | awk -v pod={} -f /tmp/cpu.awk'

0 commit comments

Comments
 (0)