Skip to content

Commit ac0db7d

Browse files
Merge pull request #594 from ehashman/bz-1932097
Bug 1932097: UPSTREAM: 98571: kubelet: Stop probing a pod during graceful shutdown
2 parents 1ce30fc + a6b53d1 commit ac0db7d

File tree

2 files changed

+71
-17
lines changed

2 files changed

+71
-17
lines changed

pkg/kubelet/prober/worker.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,20 @@ func (w *worker) doProbe() (keepGoing bool) {
222222
w.pod.Spec.RestartPolicy != v1.RestartPolicyNever
223223
}
224224

225+
// Graceful shutdown of the pod.
226+
if w.pod.ObjectMeta.DeletionTimestamp != nil && (w.probeType == liveness || w.probeType == startup) {
227+
klog.V(3).Infof("Pod deletion requested, setting %v probe result to success: %v - %v",
228+
w.probeType.String(), format.Pod(w.pod), w.container.Name)
229+
if w.probeType == startup {
230+
klog.Warningf("Pod deletion requested before container has fully started: %v - %v",
231+
format.Pod(w.pod), w.container.Name)
232+
}
233+
// Set a last result to ensure quiet shutdown.
234+
w.resultsManager.Set(w.containerID, results.Success, w.pod)
235+
// Stop probing at this point.
236+
return false
237+
}
238+
225239
// Probe disabled for InitialDelaySeconds.
226240
if int32(time.Since(c.State.Running.StartedAt.Time).Seconds()) < w.spec.InitialDelaySeconds {
227241
return true
@@ -230,7 +244,7 @@ func (w *worker) doProbe() (keepGoing bool) {
230244
if c.Started != nil && *c.Started {
231245
// Stop probing for startup once container has started.
232246
if w.probeType == startup {
233-
return true
247+
return false
234248
}
235249
} else {
236250
// Disable other probes until container has started.

pkg/kubelet/prober/worker_test.go

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -58,25 +58,47 @@ func TestDoProbe(t *testing.T) {
5858
failedStatus.Phase = v1.PodFailed
5959

6060
tests := []struct {
61-
probe v1.Probe
62-
podStatus *v1.PodStatus
63-
expectContinue bool
64-
expectSet bool
65-
expectedResult results.Result
61+
probe v1.Probe
62+
podStatus *v1.PodStatus
63+
expectContinue map[string]bool
64+
expectSet bool
65+
expectedResult results.Result
66+
setDeletionTimestamp bool
6667
}{
6768
{ // No status.
68-
expectContinue: true,
69+
expectContinue: map[string]bool{
70+
liveness.String(): true,
71+
readiness.String(): true,
72+
startup.String(): true,
73+
},
6974
},
7075
{ // Pod failed
7176
podStatus: &failedStatus,
7277
},
78+
{ // Pod deletion
79+
podStatus: &runningStatus,
80+
setDeletionTimestamp: true,
81+
expectSet: true,
82+
expectContinue: map[string]bool{
83+
readiness.String(): true,
84+
},
85+
expectedResult: results.Success,
86+
},
7387
{ // No container status
74-
podStatus: &otherStatus,
75-
expectContinue: true,
88+
podStatus: &otherStatus,
89+
expectContinue: map[string]bool{
90+
liveness.String(): true,
91+
readiness.String(): true,
92+
startup.String(): true,
93+
},
7694
},
7795
{ // Container waiting
78-
podStatus: &pendingStatus,
79-
expectContinue: true,
96+
podStatus: &pendingStatus,
97+
expectContinue: map[string]bool{
98+
liveness.String(): true,
99+
readiness.String(): true,
100+
startup.String(): true,
101+
},
80102
expectSet: true,
81103
expectedResult: results.Failure,
82104
},
@@ -86,8 +108,12 @@ func TestDoProbe(t *testing.T) {
86108
expectedResult: results.Failure,
87109
},
88110
{ // Probe successful.
89-
podStatus: &runningStatus,
90-
expectContinue: true,
111+
podStatus: &runningStatus,
112+
expectContinue: map[string]bool{
113+
liveness.String(): true,
114+
readiness.String(): true,
115+
startup.String(): true,
116+
},
91117
expectSet: true,
92118
expectedResult: results.Success,
93119
},
@@ -96,7 +122,11 @@ func TestDoProbe(t *testing.T) {
96122
probe: v1.Probe{
97123
InitialDelaySeconds: -100,
98124
},
99-
expectContinue: true,
125+
expectContinue: map[string]bool{
126+
liveness.String(): true,
127+
readiness.String(): true,
128+
startup.String(): true,
129+
},
100130
expectSet: true,
101131
expectedResult: results.Success,
102132
},
@@ -107,8 +137,12 @@ func TestDoProbe(t *testing.T) {
107137
if test.podStatus != nil {
108138
m.statusManager.SetPodStatus(w.pod, *test.podStatus)
109139
}
110-
if c := w.doProbe(); c != test.expectContinue {
111-
t.Errorf("[%s-%d] Expected continue to be %v but got %v", probeType, i, test.expectContinue, c)
140+
if test.setDeletionTimestamp {
141+
now := metav1.Now()
142+
w.pod.ObjectMeta.DeletionTimestamp = &now
143+
}
144+
if c := w.doProbe(); c != test.expectContinue[probeType.String()] {
145+
t.Errorf("[%s-%d] Expected continue to be %v but got %v", probeType, i, test.expectContinue[probeType.String()], c)
112146
}
113147
result, ok := resultsManager(m, probeType).Get(testContainerID)
114148
if ok != test.expectSet {
@@ -299,6 +333,12 @@ func expectContinue(t *testing.T, w *worker, c bool, msg string) {
299333
}
300334
}
301335

336+
func expectStop(t *testing.T, w *worker, c bool, msg string) {
337+
if c {
338+
t.Errorf("[%s - %s] Expected to stop, but did not", w.probeType, msg)
339+
}
340+
}
341+
302342
func resultsManager(m *manager, probeType probeType) results.Manager {
303343
switch probeType {
304344
case readiness:
@@ -468,6 +508,6 @@ func TestStartupProbeDisabledByStarted(t *testing.T) {
468508
// startupProbe fails, but is disabled
469509
m.prober.exec = fakeExecProber{probe.Failure, nil}
470510
msg = "Started, probe failure, result success"
471-
expectContinue(t, w, w.doProbe(), msg)
511+
expectStop(t, w, w.doProbe(), msg)
472512
expectResult(t, w, results.Success, msg)
473513
}

0 commit comments

Comments
 (0)