Skip to content

Commit 119fbf4

Browse files
authored
Merge pull request #8974 from tstromberg/retry-cgroups
entrypoint: Retry fix_cgroup on failure
2 parents 84dd707 + 1f4bfa1 commit 119fbf4

File tree

1 file changed

+31
-23
lines changed

1 file changed

+31
-23
lines changed

deploy/kicbase/entrypoint

+31-23
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ fix_mount() {
6565
mount --make-rshared /
6666
}
6767

68-
fix_cgroup() {
68+
fix_cgroup_mounts() {
6969
echo 'INFO: fix cgroup mounts for all subsystems'
7070
# For each cgroup subsystem, Docker does a bind mount from the current
7171
# cgroup to the root of the cgroup subsystem. For instance:
@@ -78,30 +78,38 @@ fix_cgroup() {
7878
# This is because `/proc/<pid>/cgroup` is not affected by the bind mount.
7979
# The following is a workaround to recreate the original cgroup
8080
# environment by doing another bind mount for each subsystem.
81-
local docker_cgroup_mounts
82-
docker_cgroup_mounts=$(grep /sys/fs/cgroup /proc/self/mountinfo | grep docker || true)
83-
if [[ -n "${docker_cgroup_mounts}" ]]; then
84-
local docker_cgroup cgroup_subsystems subsystem
85-
docker_cgroup=$(echo "${docker_cgroup_mounts}" | head -n 1 | cut -d' ' -f 4)
86-
cgroup_subsystems=$(echo "${docker_cgroup_mounts}" | cut -d' ' -f 5)
87-
echo "${cgroup_subsystems}" |
88-
while IFS= read -r subsystem; do
89-
mkdir -p "${subsystem}${docker_cgroup}"
90-
mount --bind "${subsystem}" "${subsystem}${docker_cgroup}"
81+
local cgroup_mounts
82+
83+
# NOTE: This extracts fields 4 and on
84+
# See https://man7.org/linux/man-pages/man5/proc.5.html for field names
85+
cgroup_mounts=$(egrep -o '(/docker|libpod_parent).*/sys/fs/cgroup.*' /proc/self/mountinfo || true)
86+
87+
if [[ -n "${cgroup_mounts}" ]]; then
88+
local mount_root
89+
mount_root=$(echo "${cgroup_mounts}" | head -n 1 | cut -d' ' -f1)
90+
91+
for mount_point in $(echo "${cgroup_mounts}" | cut -d' ' -f 2); do
92+
# bind mount each mount_point to mount_point + mount_root
93+
# mount --bind /sys/fs/cgroup/cpu /sys/fs/cgroup/cpu/docker/fb07bb6daf7730a3cb14fc7ff3e345d1e47423756ce54409e66e01911bab2160
94+
local target="${mount_point}${mount_root}"
95+
96+
if ! findmnt "${target}"; then
97+
mkdir -p "${target}"
98+
mount --bind "${mount_point}" "${target}"
99+
fi
91100
done
92101
fi
93-
local podman_cgroup_mounts
94-
podman_cgroup_mounts=$(grep /sys/fs/cgroup /proc/self/mountinfo | grep libpod_parent || true)
95-
if [[ -n "${podman_cgroup_mounts}" ]]; then
96-
local podman_cgroup cgroup_subsystems subsystem
97-
podman_cgroup=$(echo "${podman_cgroup_mounts}" | head -n 1 | cut -d' ' -f 4)
98-
cgroup_subsystems=$(echo "${podman_cgroup_mounts}" | cut -d' ' -f 5)
99-
echo "${cgroup_subsystems}" |
100-
while IFS= read -r subsystem; do
101-
mkdir -p "${subsystem}${podman_cgroup}"
102-
mount --bind "${subsystem}" "${subsystem}${podman_cgroup}"
102+
}
103+
104+
retryable_fix_cgroup_mounts() {
105+
for i in $(seq 0 10); do
106+
fix_cgroup_mounts && return || echo "fix_cgroup failed with exit code $? (retry $i)"
107+
echo "fix_cgroup diagnostics information below:"
108+
mount
109+
sleep 1
103110
done
104-
fi
111+
112+
exit 31
105113
}
106114

107115
fix_machine_id() {
@@ -256,7 +264,7 @@ enable_network_magic(){
256264
select_iptables
257265
fix_kmsg
258266
fix_mount
259-
fix_cgroup
267+
retryable_fix_cgroup_mounts
260268
fix_machine_id
261269
fix_product_name
262270
fix_product_uuid

0 commit comments

Comments
 (0)