Skip to content

Commit ee48a99

Browse files
Refactor container cleanup to improve logging
In the past, the set of logs coming from containers was not well labeled and we were losing information about the containers when we harvested their logs, which could have led to name collisions. By naming the container log files the same as k8s names the containers, we have a better guarantee of uniqueness. Signed-off-by: Steve Kuznetsov <[email protected]>
1 parent 03e7153 commit ee48a99

File tree

3 files changed

+91
-38
lines changed

3 files changed

+91
-38
lines changed

hack/lib/cleanup.sh

+86-1
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,89 @@
1414
function os::cleanup::dump_etcd() {
1515
os::log::info "Dumping etcd contents to ${ARTIFACT_DIR}/etcd_dump.json"
1616
os::util::curl_etcd "/v2/keys/?recursive=true" > "${ARTIFACT_DIR}/etcd_dump.json"
17-
}
17+
}
18+
19+
# os::cleanup::containers operates on k8s containers to stop the containers
20+
# and optionally remove the containers and any volumes they had attached.
21+
#
22+
# Globals:
23+
# - SKIP_IMAGE_CLEANUP
24+
# Arguments:
25+
# None
26+
# Returns:
27+
# None
28+
function os::cleanup::containers() {
29+
if ! os::util::find::system_binary docker >/dev/null 2>&1; then
30+
os::log::warning "No \`docker\` binary found, skipping container cleanup."
31+
return
32+
fi
33+
34+
os::log::info "Stopping k8s docker containers"
35+
for id in $( os::cleanup::internal::list_k8s_containers ); do
36+
os::log::debug "Stopping ${id}"
37+
docker stop "${id}" >/dev/null
38+
done
39+
40+
if [[ -n "${SKIP_IMAGE_CLEANUP:-}" ]]; then
41+
return
42+
fi
43+
44+
os::log::info "Removing k8s docker containers"
45+
for id in $( os::cleanup::internal::list_k8s_containers ); do
46+
os::log::debug "Removing ${id}"
47+
docker stop "${id}" >/dev/null
48+
done
49+
}
50+
readonly -f os::cleanup::containers
51+
52+
# os::cleanup::dump_container_logs operates on k8s containers to dump any logs
53+
# from the containers.
54+
#
55+
# Globals:
56+
# None
57+
# Arguments:
58+
# None
59+
# Returns:
60+
# None
61+
function os::cleanup::dump_container_logs() {
62+
if ! os::util::find::system_binary docker >/dev/null 2>&1; then
63+
os::log::warning "No \`docker\` binary found, skipping container cleanup."
64+
return
65+
fi
66+
67+
local container_log_dir="${LOG_DIR}/containers"
68+
mkdir -p "${container_log_dir}"
69+
70+
os::log::info "Dumping container logs to ${container_log_dir}"
71+
for id in $( os::cleanup::internal::list_k8s_containers ); do
72+
local name; name="$( docker inspect --format '{{ .Name }}' "${id}" )"
73+
os::log::debug "Dumping logs for ${id} to ${name}.log"
74+
docker logs "${id}" >"${container_log_dir}/${name}.log" 2>&1
75+
done
76+
}
77+
readonly -f os::cleanup::dump_container_logs
78+
79+
80+
81+
# os::cleanup::internal::list_k8s_containers returns a space-delimited list of
82+
# docker containers that belonged to k8s.
83+
#
84+
# Globals:
85+
# None
86+
# Arguments:
87+
# None
88+
# Returns:
89+
# None
90+
function os::cleanup::internal::list_k8s_containers() {
91+
local ids;
92+
for short_id in $( docker ps -aq ); do
93+
local id; id="$( docker inspect --format '{{ .Id }}' "${short_id}" )"
94+
local name; name="$( docker inspect --format '{{ .Name }}' "${id}" )"
95+
if [[ "${name}" =~ ^/k8s_.* ]]; then
96+
ids+=( "${id}" )
97+
fi
98+
done
99+
100+
echo "${ids[*]}"
101+
}
102+
readonly -f os::cleanup::internal::list_k8s_containers

hack/test-end-to-end-docker.sh

+3-6
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ function cleanup()
2424
echo
2525

2626
set +e
27-
dump_container_logs
27+
os::cleanup::dump_container_logs
2828

2929
# pull information out of the server log so that we can get failure management in jenkins to highlight it and
3030
# really have it smack people in their logs. This is a severe correctness problem
31-
grep -a5 "CACHE.*ALTERED" ${LOG_DIR}/container-origin.log
31+
grep -ra5 "CACHE.*ALTERED" ${LOG_DIR}/containers
3232

3333
os::cleanup::dump_etcd
3434

@@ -37,10 +37,7 @@ function cleanup()
3737
docker stop origin
3838
docker rm origin
3939

40-
os::log::info "Stopping k8s docker containers"; docker ps | awk 'index($NF,"k8s_")==1 { print $1 }' | xargs -l -r docker stop
41-
if [[ -z "${SKIP_IMAGE_CLEANUP-}" ]]; then
42-
os::log::info "Removing k8s docker containers"; docker ps -a | awk 'index($NF,"k8s_")==1 { print $1 }' | xargs -l -r docker rm
43-
fi
40+
os::cleanup::containers
4441
set -u
4542
fi
4643

hack/util.sh

+2-31
Original file line numberDiff line numberDiff line change
@@ -15,28 +15,6 @@ function kill_all_processes() {
1515
}
1616
readonly -f kill_all_processes
1717

18-
# dump_container_logs writes container logs to $LOG_DIR
19-
function dump_container_logs() {
20-
if ! docker version >/dev/null 2>&1; then
21-
return
22-
fi
23-
24-
mkdir -p ${LOG_DIR}
25-
26-
os::log::info "Dumping container logs to ${LOG_DIR}"
27-
for container in $(docker ps -aq); do
28-
container_name=$(docker inspect -f "{{.Name}}" $container)
29-
# strip off leading /
30-
container_name=${container_name:1}
31-
if [[ "$container_name" =~ ^k8s_ ]]; then
32-
pod_name=$(echo $container_name | awk 'BEGIN { FS="[_.]+" }; { print $4 }')
33-
container_name=${pod_name}-$(echo $container_name | awk 'BEGIN { FS="[_.]+" }; { print $2 }')
34-
fi
35-
docker logs "$container" >&"${LOG_DIR}/container-${container_name}.log"
36-
done
37-
}
38-
readonly -f dump_container_logs
39-
4018
# delete_empty_logs deletes empty logs
4119
function delete_empty_logs() {
4220
# Clean up zero byte log files
@@ -71,25 +49,18 @@ function cleanup_openshift() {
7149
ETCD_PORT="${ETCD_PORT:-4001}"
7250

7351
set +e
74-
dump_container_logs
75-
7652
# pull information out of the server log so that we can get failure management in jenkins to highlight it and
7753
# really have it smack people in their logs. This is a severe correctness problem
7854
grep -a5 "CACHE.*ALTERED" ${LOG_DIR}/openshift.log
7955

8056
os::cleanup::dump_etcd
57+
os::cleanup::dump_container_logs
8158

8259
if [[ -z "${SKIP_TEARDOWN-}" ]]; then
8360
os::log::info "Tearing down test"
8461
kill_all_processes
8562

86-
if docker version >/dev/null 2>&1; then
87-
os::log::info "Stopping k8s docker containers"; docker ps | awk 'index($NF,"k8s_")==1 { print $1 }' | xargs -l -r docker stop -t 1 >/dev/null
88-
if [[ -z "${SKIP_IMAGE_CLEANUP-}" ]]; then
89-
os::log::info "Removing k8s docker containers"; docker ps -a | awk 'index($NF,"k8s_")==1 { print $1 }' | xargs -l -r docker rm -v >/dev/null
90-
fi
91-
fi
92-
63+
os::cleanup::containers
9364
os::log::info "Pruning etcd data directory..."
9465
local sudo="${USE_SUDO:+sudo}"
9566
${sudo} rm -rf "${ETCD_DATA_DIR}"

0 commit comments

Comments
 (0)