Skip to content

Commit 918765f

Browse files
committed
update: graceful shutdown for recording sidecar container in K8s
Signed-off-by: Viet Nguyen Duc <[email protected]>
1 parent 75fb697 commit 918765f

21 files changed

+229
-65
lines changed

Diff for: .github/workflows/docker-test.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ jobs:
135135
uses: nick-invision/retry@master
136136
if: matrix.build-all != true
137137
with:
138-
timeout_minutes: 20
138+
timeout_minutes: 30
139139
max_attempts: 3
140140
retry_wait_seconds: 60
141141
command: |
@@ -156,7 +156,7 @@ jobs:
156156
- name: Run Docker Compose to ${{ matrix.test-strategy }}
157157
uses: nick-invision/retry@master
158158
with:
159-
timeout_minutes: 20
159+
timeout_minutes: 40
160160
max_attempts: 2
161161
retry_wait_seconds: 60
162162
command: |

Diff for: Base/check-grid.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ do
2727
esac
2828
done
2929

30-
curl -sSL http://${HOST}:${PORT}/wd/hub/status | jq -r '.value.ready' | grep -q "true" || exit 1
30+
curl -skSL ${SE_SERVER_PROTOCOL:-"http"}://${HOST}:${PORT}/wd/hub/status | jq -r '.value.ready' | grep -q "true" || exit 1

Diff for: Base/entry_point.sh

+5
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ function shutdown {
2222
echo "Waiting for Selenium Node to shutdown gracefully..."
2323
bash ${NODE_CONFIG_DIRECTORY}/nodePreStop.sh
2424
fi
25+
if [ -n "${SE_VIDEO_CONTAINER_NAME}" ]; then
26+
# For K8s, when video sidecar container and shareProcessNamespace are enabled in pod spec
27+
echo "Shutting down ${SE_VIDEO_CONTAINER_NAME} container..."
28+
pkill -f "${SE_VIDEO_CONTAINER_NAME}"
29+
fi
2530
kill -s SIGTERM ${SUPERVISOR_PID}
2631
wait ${SUPERVISOR_PID}
2732
echo "Shutdown complete"

Diff for: NodeBase/selenium.conf

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ priority=0
77
command=/opt/bin/start-xvfb.sh
88
autostart=true
99
autorestart=true
10+
killasgroup=true
1011

1112
;Logs
1213
redirect_stderr=false
@@ -24,6 +25,7 @@ priority=5
2425
command=/opt/bin/start-vnc.sh
2526
autostart=true
2627
autorestart=true
28+
killasgroup=true
2729

2830
;Logs
2931
redirect_stderr=false
@@ -41,6 +43,7 @@ priority=10
4143
command=/opt/bin/start-novnc.sh
4244
autostart=true
4345
autorestart=true
46+
killasgroup=true
4447

4548
;Logs
4649
redirect_stderr=false

Diff for: Video/entry_point.sh

-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ SUPERVISOR_PID=$!
1717
function shutdown {
1818
echo "Trapped SIGTERM/SIGINT/x so shutting down supervisord..."
1919
kill -s SIGTERM ${SUPERVISOR_PID}
20-
wait `pgrep -f ffmpeg | tr '\n' ' '`
21-
wait `pgrep -f rclone | tr '\n' ' '`
2220
wait ${SUPERVISOR_PID}
2321
echo "Shutdown complete"
2422
}

Diff for: Video/supervisord.conf

-3
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ minprocs=200 ; (min. avail process descriptors;
1414
[program:video-recording]
1515
priority=0
1616
command=/opt/bin/video.sh
17-
stopasgroup = true
1817
killasgroup=true
1918
autostart=true
2019
startsecs=0
@@ -28,7 +27,6 @@ stdout_logfile_maxbytes=0
2827
[program:video-ready]
2928
priority=5
3029
command=python3 /opt/bin/video_ready.py
31-
stopasgroup = true
3230
killasgroup=true
3331
autostart=true
3432
autorestart=true
@@ -41,7 +39,6 @@ stdout_logfile_maxbytes=0
4139
[program:video-upload]
4240
priority=10
4341
command=/opt/bin/upload.sh
44-
stopasgroup = true
4542
killasgroup=true
4643
autostart=%(ENV_SE_VIDEO_INTERNAL_UPLOAD)s
4744
autorestart=%(ENV_SE_VIDEO_INTERNAL_UPLOAD)s

Diff for: Video/upload.sh

+21-20
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ UPLOAD_OPTS=${SE_UPLOAD_OPTS:-"-P --cutoff-mode SOFT --metadata"}
88
UPLOAD_RETAIN_LOCAL_FILE=${SE_UPLOAD_RETAIN_LOCAL_FILE:-"false"}
99
UPLOAD_PIPE_FILE_NAME=${SE_UPLOAD_PIPE_FILE_NAME:-"uploadpipe"}
1010
SE_VIDEO_INTERNAL_UPLOAD=${SE_VIDEO_INTERNAL_UPLOAD:-"false"}
11-
VIDEO_UPLOAD_ENABLED=${SE_VIDEO_UPLOAD_ENABLED:-"false"}
1211
VIDEO_UPLOAD_BATCH_CHECK=${SE_VIDEO_UPLOAD_BATCH_CHECK:-"10"}
1312
process_name="video.uploader"
1413

@@ -44,24 +43,6 @@ function rename_rclone_env() {
4443
done
4544
}
4645

47-
function consume_pipe_file() {
48-
while read FILE DESTINATION < ${UPLOAD_PIPE_FILE};
49-
do
50-
if [ "${FILE}" = "exit" ];
51-
then
52-
FORCE_EXIT=true
53-
exit
54-
elif [ "$FILE" != "" ] && [ "$DESTINATION" != "" ];
55-
then
56-
rclone_upload "${FILE}" "${DESTINATION}"
57-
elif [ -f ${FORCE_EXIT_FILE} ];
58-
then
59-
echo "$(date +%FT%T%Z) [${process_name}] - Force exit signal detected"
60-
exit
61-
fi
62-
done
63-
}
64-
6546
list_rclone_pid=()
6647
function check_and_clear_background() {
6748
# Wait for a batch rclone processes to finish
@@ -79,7 +60,27 @@ function rclone_upload() {
7960
local source=$1
8061
local target=$2
8162
echo "$(date +%FT%T%Z) [${process_name}] - Uploading ${source} to ${target}"
82-
exec rclone --config ${UPLOAD_CONFIG_DIRECTORY}/${UPLOAD_CONFIG_FILE_NAME} ${UPLOAD_COMMAND} ${UPLOAD_OPTS} "${source}" "${target}" &
63+
rclone --config ${UPLOAD_CONFIG_DIRECTORY}/${UPLOAD_CONFIG_FILE_NAME} ${UPLOAD_COMMAND} ${UPLOAD_OPTS} "${source}" "${target}" &
64+
list_rclone_pid+=($!)
65+
check_and_clear_background
66+
}
67+
68+
function consume_pipe_file() {
69+
while read FILE DESTINATION < ${UPLOAD_PIPE_FILE};
70+
do
71+
if [ "${FILE}" = "exit" ];
72+
then
73+
FORCE_EXIT=true
74+
exit
75+
elif [ "$FILE" != "" ] && [ "$DESTINATION" != "" ];
76+
then
77+
rclone_upload "${FILE}" "${DESTINATION}"
78+
elif [ -f ${FORCE_EXIT_FILE} ];
79+
then
80+
echo "$(date +%FT%T%Z) [${process_name}] - Force exit signal detected"
81+
exit
82+
fi
83+
done
8384
}
8485

8586
function graceful_exit() {

Diff for: Video/video.sh

+37-27
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ VIDEO_FOLDER=${VIDEO_FOLDER}
1111
VIDEO_UPLOAD_ENABLED=${VIDEO_UPLOAD_ENABLED:-$SE_VIDEO_UPLOAD_ENABLED}
1212
VIDEO_CONFIG_DIRECTORY=${VIDEO_CONFIG_DIRECTORY:-"/opt/bin"}
1313
UPLOAD_DESTINATION_PREFIX=${UPLOAD_DESTINATION_PREFIX:-$SE_UPLOAD_DESTINATION_PREFIX}
14-
UPLOAD_PIPE_FILE_NAME=${UPLOAD_PIPE_FILE_NAME:-"uploadpipe"}
14+
UPLOAD_PIPE_FILE_NAME=${SE_UPLOAD_PIPE_FILE_NAME:-"uploadpipe"}
1515
SE_VIDEO_INTERNAL_UPLOAD=${SE_VIDEO_INTERNAL_UPLOAD:-"false"}
1616
SE_SERVER_PROTOCOL=${SE_SERVER_PROTOCOL:-"http"}
1717
max_attempts=${SE_VIDEO_WAIT_ATTEMPTS:-50}
@@ -57,13 +57,34 @@ function create_pipe() {
5757
fi
5858
}
5959

60+
function wait_for_display() {
61+
export DISPLAY=${DISPLAY_CONTAINER_NAME}:${DISPLAY_NUM}.0
62+
attempts=0
63+
64+
echo "$(date +%FT%T%Z) [${process_name}] - Checking if the display is open"
65+
until xset b off || [[ $attempts = "$max_attempts" ]]
66+
do
67+
echo "$(date +%FT%T%Z) [${process_name}] - Waiting before next display check"
68+
sleep 0.5
69+
attempts=$((attempts+1))
70+
done
71+
if [[ $attempts = "$max_attempts" ]];
72+
then
73+
echo "$(date +%FT%T%Z) [${process_name}] - Can not open display, exiting."
74+
exit
75+
fi
76+
77+
VIDEO_SIZE=$(xdpyinfo | grep 'dimensions:' | awk '{print $2}')
78+
}
79+
6080
function wait_util_uploader_shutdown() {
6181
max_wait=5
6282
wait=0
6383
if [[ "${VIDEO_UPLOAD_ENABLED}" != "false" ]] && [[ -n "${UPLOAD_DESTINATION_PREFIX}" ]];
6484
then
6585
while [[ -f ${FORCE_EXIT_FILE} ]] && [[ ${wait} -lt ${max_wait} ]];
6686
do
87+
echo "exit" >> ${UPLOAD_PIPE_FILE} &
6788
echo "$(date +%FT%T%Z) [${process_name}] - Waiting for force exit file to be consumed by external upload container"
6889
sleep 1
6990
wait=$((wait+1))
@@ -73,6 +94,7 @@ function wait_util_uploader_shutdown() {
7394
then
7495
while [[ $(pgrep rclone | wc -l) -gt 0 ]]
7596
do
97+
echo "exit" >> ${UPLOAD_PIPE_FILE} &
7698
echo "$(date +%FT%T%Z) [${process_name}] - Recorder is waiting for RCLONE to finish"
7799
sleep 1
78100
done
@@ -134,6 +156,12 @@ function check_if_recording_inprogress() {
134156
fi
135157
}
136158

159+
function log_node_response() {
160+
if [[ -f "/tmp/graphQL_$session_id.json" ]]; then
161+
jq '.' "/tmp/graphQL_$session_id.json";
162+
fi
163+
}
164+
137165
function graceful_exit() {
138166
check_if_recording_inprogress
139167
send_exit_signal_to_uploader
@@ -160,27 +188,9 @@ if [[ "${VIDEO_UPLOAD_ENABLED}" != "true" ]] && [[ "${VIDEO_FILE_NAME}" != "auto
160188
-video_size ${VIDEO_SIZE} -r ${FRAME_RATE} -i ${DISPLAY_CONTAINER_NAME}:${DISPLAY_NUM}.0 -codec:v ${CODEC} ${PRESET} -pix_fmt yuv420p "$VIDEO_FOLDER/$VIDEO_FILE_NAME"
161189

162190
else
163-
create_pipe
164191
trap graceful_exit SIGTERM SIGINT EXIT
165-
export DISPLAY=${DISPLAY_CONTAINER_NAME}:${DISPLAY_NUM}.0
166-
167-
attempts=0
168-
169-
echo "$(date +%FT%T%Z) [${process_name}] - Checking if the display is open"
170-
until xset b off || [[ $attempts = "$max_attempts" ]]
171-
do
172-
echo "$(date +%FT%T%Z) [${process_name}] - Waiting before next display check"
173-
sleep 0.5
174-
attempts=$((attempts+1))
175-
done
176-
if [[ $attempts = "$max_attempts" ]];
177-
then
178-
echo "$(date +%FT%T%Z) [${process_name}] - Can not open display, exiting."
179-
exit
180-
fi
181-
182-
VIDEO_SIZE=$(xdpyinfo | grep 'dimensions:' | awk '{print $2}')
183-
192+
create_pipe
193+
wait_for_display
184194
recording_started="false"
185195
video_file_name=""
186196
video_file=""
@@ -201,7 +211,7 @@ else
201211
done
202212
if [[ $attempts = "$max_attempts" ]];
203213
then
204-
echo "$(date +%FT%T%Z) [${process_name}] - Can not reach node API, exiting."
214+
echo "$(date +%FT%T%Z) [${process_name}] - Can not reach node API, reach the max attempts $max_attempts, exiting."
205215
exit
206216
fi
207217
while curl --noproxy "*" -sk --request GET ${NODE_STATUS_ENDPOINT} > /tmp/status.json
@@ -214,15 +224,13 @@ else
214224
caps_se_video_record=${return_list[0]}
215225
video_file_name="${return_list[1]}.mp4"
216226
echo "$(date +%FT%T%Z) [${process_name}] - Start recording: $caps_se_video_record, video file name: $video_file_name"
217-
if [[ -f "/tmp/graphQL_$session_id.json" ]]; then
218-
jq '.' "/tmp/graphQL_$session_id.json";
219-
fi
227+
log_node_response
220228
fi
221229
if [[ "$session_id" != "null" && "$session_id" != "" && "$session_id" != "reserved" && "$recording_started" = "false" && "$caps_se_video_record" = "true" ]];
222230
then
223231
video_file="${VIDEO_FOLDER}/$video_file_name"
224232
echo "$(date +%FT%T%Z) [${process_name}] - Starting to record video"
225-
exec ffmpeg -hide_banner -loglevel warning -flags low_delay -threads 2 -fflags nobuffer+genpts -strict experimental -y -f x11grab \
233+
ffmpeg -hide_banner -loglevel warning -flags low_delay -threads 2 -fflags nobuffer+genpts -strict experimental -y -f x11grab \
226234
-video_size ${VIDEO_SIZE} -r ${FRAME_RATE} -i ${DISPLAY} -codec:v ${CODEC} ${PRESET} -pix_fmt yuv420p "$video_file" &
227235
recording_started="true"
228236
echo "$(date +%FT%T%Z) [${process_name}] - Video recording started"
@@ -244,6 +252,8 @@ else
244252
fi
245253
prev_session_id=$session_id
246254
done
247-
echo "$(date +%FT%T%Z) [${process_name}] - Node API is not responding, exiting."
255+
echo "$(date +%FT%T%Z) [${process_name}] - Last response from node API..."
256+
log_node_response
257+
echo "$(date +%FT%T%Z) [${process_name}] - Node API is not responding now, exiting..."
248258
exit
249259
fi

Diff for: charts/selenium-grid/templates/_helpers.tpl

+5-2
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ template:
271271
{{- toYaml . | nindent 6 }}
272272
{{- end }}
273273
spec:
274+
shareProcessNamespace: {{ $.Values.global.seleniumGrid.stdoutProbeLog | ternary "false" .node.shareProcessNamespace }}
274275
serviceAccountName: {{ template "seleniumGrid.serviceAccount.fullname" . }}
275276
serviceAccount: {{ template "seleniumGrid.serviceAccount.fullname" . }}
276277
restartPolicy: {{ template "seleniumGrid.node.restartPolicy" . }}
@@ -422,7 +423,7 @@ template:
422423
{{- toYaml .node.sidecars | nindent 6 }}
423424
{{- end }}
424425
{{- if $.Values.videoRecorder.enabled }}
425-
- name: video
426+
- name: {{ $.Values.videoRecorder.name }}
426427
{{- $imageTag := default $.Values.global.seleniumGrid.videoImageTag $.Values.videoRecorder.imageTag }}
427428
{{- $imageRegistry := default $.Values.global.seleniumGrid.imageRegistry $.Values.videoRecorder.imageRegistry }}
428429
image: {{ printf "%s/%s:%s" $imageRegistry $.Values.videoRecorder.imageName $imageTag }}
@@ -482,7 +483,7 @@ template:
482483
lifecycle: {{- toYaml . | nindent 10 }}
483484
{{- end }}
484485
{{- if and $.Values.videoRecorder.uploader.enabled (not (empty $.Values.videoRecorder.uploader.name)) }}
485-
- name: uploader
486+
- name: {{ default "uploader" $.Values.videoRecorder.uploader.name }}
486487
{{- $imageTag := .uploader.imageTag }}
487488
{{- $imageRegistry := .uploader.imageRegistry }}
488489
image: {{ printf "%s/%s:%s" $imageRegistry .uploader.imageName $imageTag }}
@@ -725,6 +726,8 @@ Define terminationGracePeriodSeconds of the node pod.
725726
{{- $period := $nodePeriod -}}
726727
{{- if and (eq .Values.autoscaling.scalingType "deployment") (eq (include "seleniumGrid.useKEDA" $) "true") -}}
727728
{{- $period = ternary $nodePeriod $autoscalingPeriod (gt $nodePeriod $autoscalingPeriod) -}}
729+
{{- else if and (eq .Values.autoscaling.scalingType "job") (eq (include "seleniumGrid.useKEDA" $) "true") }}
730+
{{- $period = 30 -}}
728731
{{- end -}}
729732
{{- $period -}}
730733
{{- end -}}

Diff for: charts/selenium-grid/templates/node-configmap.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ data:
2626
SE_DRAIN_AFTER_SESSION_COUNT: '{{- and (eq (include "seleniumGrid.useKEDA" .) "true") (eq .Values.autoscaling.scalingType "job") | ternary "1" "0" -}}'
2727
SE_NODE_GRID_URL: '{{ include "seleniumGrid.url" $ }}'
2828
SE_NODE_GRID_GRAPHQL_URL: '{{ include "seleniumGrid.graphqlURL" $ }}'
29+
{{- if $.Values.videoRecorder.enabled }}
30+
SE_VIDEO_CONTAINER_NAME: {{ $.Values.videoRecorder.name | quote }}
31+
{{- end }}
2932
{{- if $.Values.nodeConfigMap.leftoversCleanup.enabled }}
3033
SE_ENABLE_BROWSER_LEFTOVERS_CLEANUP: 'true'
3134
{{- with $.Values.nodeConfigMap.leftoversCleanup.jobIntervalInSecs }}

Diff for: charts/selenium-grid/values.yaml

+6-1
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,7 @@ chromeNode:
745745
annotations: {}
746746
# Labels for chrome-node pods
747747
labels: {}
748+
shareProcessNamespace: true
748749
# Resources for chrome-node container
749750
resources:
750751
requests:
@@ -909,6 +910,7 @@ firefoxNode:
909910
tolerations: []
910911
# Node selector for firefox-node pods
911912
nodeSelector: {}
913+
shareProcessNamespace: true
912914
# Resources for firefox-node container
913915
resources:
914916
requests:
@@ -1068,6 +1070,7 @@ edgeNode:
10681070
tolerations: []
10691071
# Node selector for edge-node pods
10701072
nodeSelector: {}
1073+
shareProcessNamespace: true
10711074
# Resources for edge-node container
10721075
resources:
10731076
requests:
@@ -1191,6 +1194,8 @@ edgeNode:
11911194

11921195
videoRecorder:
11931196
enabled: false
1197+
# Container name is set to resource specs
1198+
name: video
11941199
# imageRegistry: selenium
11951200
# Image of video recorder
11961201
imageName: video
@@ -1204,7 +1209,7 @@ videoRecorder:
12041209
enabled: false
12051210
# Where to upload the video file e.g. remoteName://bucketName/path. Refer to destination syntax of rclone https://rclone.org/docs/
12061211
destinationPrefix:
1207-
# What uploader to use. See .videRecorder.rclone for how to create a new one.
1212+
# What uploader to use. See .videRecorder.s3 for how to create a new one.
12081213
name:
12091214
configFileName: upload.conf
12101215
entryPointFileName: upload.sh

Diff for: tests/charts/ci/JobAutoscaling-values.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
autoscaling:
22
scalingType: job
33
scaledJobOptions:
4-
successfulJobsHistoryLimit: 0
5-
failedJobsHistoryLimit: 0
4+
successfulJobsHistoryLimit: 100
5+
failedJobsHistoryLimit: 100
66
scalingStrategy:
77
strategy: default
88
scaledOptions:

Diff for: tests/charts/ci/base-auth-ingress-values.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
global:
22
seleniumGrid:
33
logLevel: INFO
4-
stdoutProbeLog: true
4+
stdoutProbeLog: false
55

66
serverConfigMap:
77
env:
8-
SE_SUPERVISORD_LOG_LEVEL: "error"
8+
SE_SUPERVISORD_LOG_LEVEL: "info"
99

1010
ingress:
1111
enabled: true

0 commit comments

Comments
 (0)