forked from aws/aws-node-termination-handler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun-test
executable file
·310 lines (273 loc) · 10.9 KB
/
run-test
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
#!/bin/bash
set -euo pipefail
START=$(date +%s)
START_FOR_QUERYING=$(date -u +"%Y-%m-%dT%TZ")
export START_FOR_QUERYING
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
CLUSTER_NAME_BASE="nth-test"
PRESERVE=false
TMP_DIR=""
DOCKER_ARGS=""
PROVISION_CLUSTER_ARGS=""
DELETE_CLUSTER_ARGS=""
DEFAULT_NODE_TERMINATION_HANDLER_DOCKER_IMG="node-termination-handler:customtest"
NODE_TERMINATION_HANDLER_DOCKER_IMG=""
DEFAULT_WEBHOOK_DOCKER_IMG="webhook-test-proxy:customtest"
WEBHOOK_DOCKER_IMG=""
OVERRIDE_PATH=0
K8S_VERSION="1.29"
AEMM_URL="amazon-ec2-metadata-mock-service.default.svc.cluster.local"
AEMM_VERSION="1.8.1"
AEMM_DL_URL="https://github.com/aws/amazon-ec2-metadata-mock/releases/download/v$AEMM_VERSION/amazon-ec2-metadata-mock-$AEMM_VERSION.tgz"
WEBHOOK_URL=${WEBHOOK_URL:="http://webhook-test-proxy.default.svc.cluster.local"}
ASSERTION_SCRIPTS=$(find "$SCRIPTPATH/../e2e" -type f | sort)
SCRIPT_DENYLIST=(
"$SCRIPTPATH/../e2e/asg-launch-lifecycle-sqs-test"
)
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
function relpath() {
perl -e 'use File::Spec; print File::Spec->abs2rel(@ARGV) . "\n"' "$1" "$2"
}
function retry {
local retries=$1
shift
local count=0
set +e
trap "set -e" RETURN
until "$@"; do
exit=$?
set -e
wait=$((2 ** count))
count=$((count + 1))
if [ $count -lt "$retries" ]; then
echo "Retry $count/$retries exited $exit, retrying in $wait seconds..."
sleep $wait
else
echo "Retry $count/$retries exited $exit, no more retries left."
return $exit
fi
set +e
done
return 0
}
export -f retry
function clean_up {
if [[ "$PRESERVE" == false ]]; then
"$SCRIPTPATH/../k8s-local-cluster-test/delete-cluster" "$DELETE_CLUSTER_ARGS" || :
return
fi
echo "To resume test with the same cluster use: \"-c $TMP_DIR\""""
}
function exit_and_fail {
local pod_id
pod_id=$(get_nth_worker_pod || :)
kubectl logs "$pod_id" --namespace kube-system || :
END=$(date +%s)
echo "⏰ Took $((END - START))sec"
echo "❌ NTH Integration Test FAILED $CLUSTER_NAME! ❌"
exit 1
}
function reset_cluster {
echo "Resetting cluster"
charts=$(helm ls --all --short)
if [[ -n "$charts" ]]; then
helm del $charts --no-hooks || :
fi
system_charts=$(helm ls --all --short --namespace kube-system)
if [[ -n "$system_charts" ]]; then
helm del $system_charts --no-hooks --namespace kube-system || :
fi
for node in $(kubectl get nodes | tail -n+2 | cut -d' ' -f1); do
kubectl uncordon "$node"
kubectl taint node "$node" aws-node-termination-handler/scheduled-maintenance- || true
kubectl taint node "$node" aws-node-termination-handler/spot-itn- || true
kubectl taint node "$node" aws-node-termination-handler/asg-lifecycle-termination- || true
kubectl taint node "$node" aws-node-termination-handler/rebalance-recommendation- || true
done
remove_labels || :
kubectl delete events --all
sleep 2
}
function remove_labels {
echo "Removing labels from NTH cluster nodes"
labels_to_remove=()
while IFS='' read -r line; do
labels_to_remove+=("$line");
done < <(kubectl get nodes -o json | jq '.items[].metadata.labels' | grep 'aws-node-termination-handler' | tr -d '[:blank:]' | tr -d '\"' | cut -d':' -f1)
if [[ "${#labels_to_remove[@]}" -ne 0 ]]; then
for l in "${labels_to_remove[@]}"; do
for n in $(kubectl get nodes -o json | jq -r '.items[].metadata.name'); do
echo "Deleting label $l on node $n"
kubectl label node "$n" "$l"-
done
done
fi
}
function get_nth_worker_pod {
kubectl get pods -n kube-system \
--selector 'app.kubernetes.io/name=aws-node-termination-handler' \
--field-selector="status.phase=Running" \
--sort-by=.metadata.creationTimestamp \
-o go-template --template '{{range .items}}{{.metadata.name}} {{.metadata.creationTimestamp}}{{"\n"}}{{end}}' \
| awk '$2 >= "'"${START_FOR_QUERYING//+0000/Z}"'" { print $1 }' | head -1
}
USAGE=$(cat << 'EOM'
Usage: run-test [-p] [-d] [-o] [-a <ASSERTION_SCRIPT] [-b <TEST_BASE_NAME] [-c <CLUSTER_CONTEXT_DIR] [-n <NTH_DOCKER_IMG>] [-v K8S_VERSION] [-w <WEBHOOK_DOCKER_IMG>]
Executes a test within a provisioned kubernetes cluster with NTH and IMDS pre-loaded.
Example: run-test -p -n node-termination-handler:customtest
Optional:
-a Assertion script (default is ALL tests in the e2e dir)
-b Base name of test (will be used for cluster too)
-c Cluster context directory, if operating on an existing cluster
-p Preserve kind k8s cluster for inspection
-n Node Termination Handler Docker Image
-d use GOPROXY=direct to bypass proxy.golang.org
-o Override path w/ your own kubectl and kind binaries
-v Kubernetes Version (Default: 1.29) [1.23, 1.24, 1.25, 1.26, 1.27, 1.28, and 1.29]
-w Webhook Docker Image
EOM
)
# Process our input arguments
while getopts "pdn:oc:a:b:v:w:" opt; do
case ${opt} in
p ) # PRESERVE K8s Cluster
echo "❄️ This run will preserve the cluster as you requested"
PRESERVE=true
;;
n ) # Node Termination Handler Docker Image
NODE_TERMINATION_HANDLER_DOCKER_IMG="$OPTARG"
;;
d ) # use GOPROXY=direct
DOCKER_ARGS="--build-arg GOPROXY=direct"
;;
o ) # Override path with your own kubectl and kind binaries
DELETE_CLUSTER_ARGS="$DELETE_CLUSTER_ARGS -o"
PROVISION_CLUSTER_ARGS="$PROVISION_CLUSTER_ARGS -o"
OVERRIDE_PATH=1
;;
c ) # Cluster context directory to operate on existing cluster
TMP_DIR=$OPTARG
;;
a ) # Assertion script
ASSERTION_SCRIPTS=$(echo "$OPTARG" | tr "," "\n")
;;
b ) # Base cluster name
CLUSTER_NAME_BASE=$OPTARG
;;
v ) # K8s VERSION
K8S_VERSION=$OPTARG
;;
w ) # Webhook Docker Image
WEBHOOK_DOCKER_IMG="$OPTARG"
;;
\? )
echo "$USAGE" 1>&2
exit
;;
esac
done
if [[ -z $TMP_DIR ]]; then
TMP_DIR=$("$SCRIPTPATH/../k8s-local-cluster-test/provision-cluster" -b "$CLUSTER_NAME_BASE" -v "$K8S_VERSION $PROVISION_CLUSTER_ARGS")
fi
if [[ $OVERRIDE_PATH -eq 0 ]]; then
export PATH=$TMP_DIR:$PATH
else
export PATH=$PATH:$TMP_DIR
fi
CLUSTER_NAME=$(cat "$TMP_DIR/clustername")
## Build and Load Docker Images
if [ -z "$NODE_TERMINATION_HANDLER_DOCKER_IMG" ]; then
echo "🥑 Building the node-termination-handler docker image"
docker buildx build --load $DOCKER_ARGS -t "$DEFAULT_NODE_TERMINATION_HANDLER_DOCKER_IMG" "$SCRIPTPATH/../../."
NODE_TERMINATION_HANDLER_DOCKER_IMG="$DEFAULT_NODE_TERMINATION_HANDLER_DOCKER_IMG"
echo "👍 Built the node-termination-handler docker image"
else
echo "🥑 Skipping building the node-termination-handler docker image, since one was specified ($NODE_TERMINATION_HANDLER_DOCKER_IMG)"
fi
echo "$NODE_TERMINATION_HANDLER_DOCKER_IMG" > "$TMP_DIR/nth-docker-img"
NODE_TERMINATION_HANDLER_DOCKER_REPO=$(echo "$NODE_TERMINATION_HANDLER_DOCKER_IMG" | cut -d':' -f1)
NODE_TERMINATION_HANDLER_DOCKER_TAG=$(echo "$NODE_TERMINATION_HANDLER_DOCKER_IMG" | cut -d':' -f2)
if [ -z "$WEBHOOK_DOCKER_IMG" ]; then
echo "🥑 Building the webhook-test-proxy docker image"
docker buildx build --load $DOCKER_ARGS -t "$DEFAULT_WEBHOOK_DOCKER_IMG" "$SCRIPTPATH/../webhook-test-proxy/."
WEBHOOK_DOCKER_IMG="$DEFAULT_WEBHOOK_DOCKER_IMG"
echo "👍 Built the webhook-test-proxy docker image"
else
echo "🥑 Skipping building the webhook-test-proxy docker image, since one was specified ($WEBHOOK_DOCKER_IMG)"
fi
echo "$WEBHOOK_DOCKER_IMG" > "$TMP_DIR/webhook-test-proxy-docker-img"
WEBHOOK_DOCKER_REPO=$(echo "$WEBHOOK_DOCKER_IMG" | cut -d':' -f1)
WEBHOOK_DOCKER_TAG=$(echo "$WEBHOOK_DOCKER_IMG" | cut -d':' -f2)
echo "🥑 Loading both images into the cluster"
kind load docker-image --name "$CLUSTER_NAME" "$NODE_TERMINATION_HANDLER_DOCKER_IMG"
kind load docker-image --name "$CLUSTER_NAME" "$WEBHOOK_DOCKER_IMG"
echo "👍 Loaded both images into the cluster"
export KUBECONFIG="$TMP_DIR/kubeconfig"
trap "exit_and_fail" INT TERM ERR
trap "clean_up" EXIT
cat << EOF >$TMP_DIR/env
export KUBECONFIG=$TMP_DIR/kubeconfig
echo "Updated KUBECONFIG=$KUBECONFIG"
export PATH=$TMP_DIR:\$PATH
echo "Updated PATH=$PATH"
EOF
echo "======================================================================================================"
echo "To poke around your test manually:"
echo ". $TMP_DIR/env"
echo "kubectl get pods -A"
echo "======================================================================================================"
### exported vars and funcs that tests can use
export TMP_DIR
export CLUSTER_NAME
export AEMM_URL
export AEMM_VERSION
export AEMM_DL_URL
export WEBHOOK_URL
export -f timeout
export -f relpath
export -f get_nth_worker_pod
export NODE_TERMINATION_HANDLER_DOCKER_IMG
export NODE_TERMINATION_HANDLER_DOCKER_REPO
export NODE_TERMINATION_HANDLER_DOCKER_TAG
export WEBHOOK_DOCKER_IMG
export WEBHOOK_DOCKER_REPO
export WEBHOOK_DOCKER_TAG
export AWS_REGION="us-east-1"
export WORKER_IP="192.168.0.1"
export NTH_CONTROL_LABEL="kubernetes\.io/hostname=$CLUSTER_NAME-control-plane"
export NTH_WORKER_LABEL="kubernetes\.io/hostname=ip-${WORKER_IP//\./-}.ec2.internal"
###
## Need to override hostname label for CTH localstack tests
kubectl label node "${CLUSTER_NAME}-worker" "$(echo $NTH_WORKER_LABEL | tr -d '\\')" --overwrite
## Mark worker2 only for Critical Add-Ons like dns
kubectl taint node "${CLUSTER_NAME}-worker2" CriticalAddonsOnly=true:NoSchedule --overwrite
function is_denylisted {
if [[ ${SCRIPT_DENYLIST[*]} =~ (^|[[:space:]])$1($|[[:space:]]) ]]; then
return 0
fi
return 1
}
i=0
for assert_script in $ASSERTION_SCRIPTS; do
if is_denylisted $assert_script; then continue; fi
reset_cluster
START_FOR_QUERYING=$(date -u +"%Y-%m-%dT%TZ")
IMDS_PORT=$((i + 1338))
export IMDS_PORT
i=$((i + 1))
echo "======================================================================================================"
echo "🥑 Running assertion script $(basename "$assert_script")"
echo "======================================================================================================"
assert_start=$(date +%s)
$assert_script
assert_end=$(date +%s)
echo "⏰ Took $((assert_end - assert_start))sec"
POD_ID=$(get_nth_worker_pod || :)
kubectl logs "$POD_ID" --namespace kube-system || :
## Resets cluster to run another test on the same cluster
reset_cluster
echo "✅ Assertion test $assert_script PASSED! ✅"
done
echo "======================================================================================================"
echo "✅ All tests passed! ✅"
echo "======================================================================================================"