19
19
case $opt in
20
20
a) FORCE_ALL=true; continue ;;
21
21
f) FORCE=$OPTARG ; continue ;;
22
- s) export GUIDEBOOK_STORE=$OPTARG ; echo " Using store=$GUIDEBOOK_STORE " ; continue ;;
22
+ s) export GUIDEBOOK_STORE=$OPTARG ; echo " [Test] Using store=$GUIDEBOOK_STORE " ; continue ;;
23
23
b) GUIDEBOOK=$OPTARG ; continue ;;
24
24
i) NO_KIND=true; continue ;;
25
25
* ) continue ;;
@@ -29,13 +29,20 @@ shift $((OPTIND-1))
29
29
30
30
if [ -z " $NO_KIND " ]; then
31
31
export KUBECONFIG=$( " $SCRIPTDIR " /setup.sh)
32
- echo " Using KUBECONFIG=$KUBECONFIG "
32
+ echo " [Test] Using KUBECONFIG=$KUBECONFIG "
33
33
fi
34
34
35
35
# We get this for free from github actions. Add it generally. This
36
36
# informs the guidebooks to adjust their resource demands.
37
37
export CI=true
38
38
39
+ # If you find that dangling processes (mostly kubectl) linger, this
40
+ # may help with debugging; you may also set it to "*" to get
41
+ # everything. Word of warning: kubectl seems to respond to DEBUG being
42
+ # set to *anything*, so expect to get a bunch of low-level go logs, no
43
+ # matter what you set this to.
44
+ # export DEBUG=madwizard/cleanup
45
+
39
46
# build docker image of log aggregator just for this test and load it
40
47
# into kind
41
48
function build {
@@ -63,12 +70,12 @@ function run {
63
70
64
71
local PRE=" $MWPROFILES_PATH_BASE " /../profiles.d/$profile /pre
65
72
if [ -f " $PRE " ]; then
66
- echo " Running pre guidebooks for profile=$profile "
73
+ echo " [Test] Running pre guidebooks for profile=$profile "
67
74
cat " $PRE " | xargs -n1 " $ROOT " /bin/codeflare -p $profile $yes
68
75
fi
69
76
70
- echo " Running with variant=$variant profile=$profile yes=$yes "
71
- " $ROOT " /bin/codeflare -V -p $profile $yes $guidebook
77
+ echo " [Test] Running with variant=$variant profile=$profile yes=$yes "
78
+ GUIDEBOOK_NAME= " main-job-run " " $ROOT " /bin/codeflare -V -p $profile $yes $guidebook
72
79
}
73
80
74
81
# Undeploy any prior log aggregator
@@ -78,8 +85,8 @@ function cleanup {
78
85
local profile=$( basename $profileFull )
79
86
export MWPROFILES_PATH=" $MWPROFILES_PATH_BASE " /$variant
80
87
81
- echo " Undeploying any prior log aggregator"
82
- (" $ROOT " /bin/codeflare -p $profile -y ml/ray/aggregator/in-cluster/client-side/undeploy \
88
+ echo " [Test] Undeploying any prior log aggregator"
89
+ (GUIDEBOOK_NAME= " log-aggregator-undeploy " " $ROOT " /bin/codeflare -p $profile -y ml/ray/aggregator/in-cluster/client-side/undeploy \
83
90
|| exit 0)
84
91
}
85
92
@@ -96,8 +103,10 @@ function attach {
96
103
97
104
local jobId=$2
98
105
99
- echo " Attaching variant=$variant profile=$profile jobId=$jobId "
100
- " $ROOT " /bin/codeflare -V -p $profile attach -a $jobId
106
+ echo " [Test] Attaching variant=$variant profile=$profile jobId=$jobId "
107
+ GUIDEBOOK_NAME=" log-aggregator-attach" " $ROOT " /bin/codeflare -V -p $profile attach -a $jobId --wait &
108
+ ATTACH_PID=$!
109
+ echo " [Test] Attach underway"
101
110
}
102
111
103
112
# @return path to locally captured logs for the given jobId, run in the given profile
@@ -121,23 +130,23 @@ function validateAttach {
121
130
RUNDIR=$( localpath $profile $jobId )
122
131
123
132
if [ ! -d " $RUNDIR " ]; then
124
- echo " ❌ Logs were not captured locally: missing logdir"
133
+ echo " [Test] ❌ Logs were not captured locally: missing logdir"
125
134
exit 1
126
135
elif [ ! -f " $RUNDIR /jobid.txt" ]; then
127
- echo " ❌ Logs were not captured locally: missing jobid.txt"
136
+ echo " [Test] ❌ Logs were not captured locally: missing jobid.txt"
128
137
exit 1
129
138
elif [ ! -f " $RUNDIR /logs/job.txt" ]; then
130
- echo " ❌ Logs were not captured locally: missing logs/job.txt"
139
+ echo " [Test] ❌ Logs were not captured locally: missing logs/job.txt"
131
140
exit 1
132
141
elif [ ! -s " $RUNDIR /logs/job.txt" ]; then
133
- echo " ❌ Logs were not captured locally: empty logs/job.txt"
142
+ echo " [Test] ❌ Logs were not captured locally: empty logs/job.txt"
134
143
exit 1
135
144
fi
136
145
137
146
# TODO the expected output is going to be profile-specific
138
147
grep -q ' Final result' " $RUNDIR /logs/job.txt" \
139
- && echo " ✅ Logs seem good!" \
140
- || (echo " ❌ Logs were not captured locally: job logs incomplete" && exit 1)
148
+ && echo " [Test] ✅ Logs seem good!" \
149
+ || (echo " [Test] ❌ Logs were not captured locally: job logs incomplete" && exit 1)
141
150
}
142
151
143
152
function logpoller {
@@ -152,18 +161,24 @@ function logpoller {
152
161
# clean up after ourselves before we exit
153
162
#
154
163
function onexit {
164
+ if [ -n " $ATTACH_PID " ]; then
165
+ echo " !!!!!!!!!KILL ATTACH $ATTACH_PID "
166
+ (pkill -P $ATTACH_PID || exit 0)
167
+ fi
155
168
if [ -n " $HEAD_POLLER_PID " ]; then
156
- (kill $HEAD_POLLER_PID || exit 0)
169
+ (pkill -P $HEAD_POLLER_PID || exit 0)
157
170
fi
158
171
if [ -n " $WORKER_POLLER_PID " ]; then
159
- (kill $WORKER_POLLER_PID || exit 0)
172
+ (pkill -P $WORKER_POLLER_PID || exit 0)
160
173
fi
161
174
if [ -n " $EVENTS_PID " ]; then
162
- (kill $EVENTS_PID || exit 0)
175
+ (pkill -P $EVENTS_PID || exit 0)
163
176
fi
164
177
if [ -n " $AGGREGATOR_POLLER_PID " ]; then
165
- (kill $AGGREGATOR_POLLER_PID || exit 0)
178
+ (pkill -P $AGGREGATOR_POLLER_PID || exit 0)
166
179
fi
180
+
181
+ pkill -P $$
167
182
}
168
183
169
184
#
@@ -199,7 +214,7 @@ function test {
199
214
# allocate JOB_ID (requires node and `uuid` npm; but we should
200
215
# have both for codeflare-cli dev)
201
216
export JOB_ID=$( node -e ' console.log(require("uuid").v4())' )
202
- echo " Using JOB_ID=$JOB_ID "
217
+ echo " [Test] Using JOB_ID=$JOB_ID "
203
218
204
219
# 1. launch codeflare guidebook run
205
220
run " $1 " | tee $OUTPUT &
@@ -216,11 +231,11 @@ function test {
216
231
# done
217
232
sleep 10
218
233
219
- echo " About to attach"
220
234
attach " $1 " " $JOB_ID "
221
235
fi
222
236
223
237
wait $RUN_PID
238
+ echo " [Test] Run has finished"
224
239
# the job should be done now
225
240
226
241
# 3. if asked, now validate the log aggregator
0 commit comments