Skip to content

Commit ed546a1

Browse files
Merge pull request #18624 from deads2k/server-05-delayhandlercrash
Automatic merge from submit-queue (batch tested with PRs 18624, 18605). allow a panic to crash the server after a delay This allows a server to crash after it experiences a panic on a delay. The delay allows us to break bootstrapping loops if we're panicing, but still making progress. It also allows us to configure multiple handlers, so we can have this plus sentry by doing `OPENSHIFT_ON_PANIC=["crash-after-delay:10m", "sentry:https://foo"]` @openshift/sig-master @smarterclayton @eparis @derekwaynecarr as discussed separately
2 parents 5535deb + f542f5a commit ed546a1

File tree

2 files changed

+77
-5
lines changed

2 files changed

+77
-5
lines changed

pkg/cmd/util/serviceability/panic.go

+54-5
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,70 @@
11
package serviceability
22

33
import (
4+
"encoding/json"
45
"strings"
56
"time"
67

78
"github.com/golang/glog"
9+
810
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
911
)
1012

1113
// BehaviorOnPanic is a helper for setting the crash mode of OpenShift when a panic is caught.
1214
// It returns a function that should be the defer handler for the caller.
13-
func BehaviorOnPanic(mode string) (fn func()) {
14-
fn = func() {}
15+
func BehaviorOnPanic(modeString string) func() {
16+
modes := []string{}
17+
if err := json.Unmarshal([]byte(modeString), &modes); err != nil {
18+
return behaviorOnPanic(modeString)
19+
}
20+
21+
fns := []func(){}
22+
23+
for _, mode := range modes {
24+
fns = append(fns, behaviorOnPanic(mode))
25+
}
26+
27+
return func() {
28+
for _, fn := range fns {
29+
fn()
30+
}
31+
}
32+
}
33+
34+
func behaviorOnPanic(mode string) func() {
35+
doNothing := func() {}
36+
1537
switch {
1638
case mode == "crash":
1739
glog.Infof("Process will terminate as soon as a panic occurs.")
1840
utilruntime.ReallyCrash = true
41+
return doNothing
42+
43+
case strings.HasPrefix(mode, "crash-after-delay:"):
44+
delayDurationString := strings.TrimPrefix(mode, "crash-after-delay:")
45+
delayDuration, err := time.ParseDuration(delayDurationString)
46+
if err != nil {
47+
glog.Errorf("Unable to start crash-after-delay. Crashing immediately instead: %v", err)
48+
utilruntime.ReallyCrash = true
49+
return doNothing
50+
}
51+
glog.Infof("Process will terminate %v after a panic occurs.", delayDurationString)
52+
utilruntime.ReallyCrash = false
53+
utilruntime.PanicHandlers = append(utilruntime.PanicHandlers, crashOnDelay(delayDuration, delayDurationString))
54+
return doNothing
55+
1956
case strings.HasPrefix(mode, "sentry:"):
2057
url := strings.TrimPrefix(mode, "sentry:")
2158
m, err := NewSentryMonitor(url)
2259
if err != nil {
2360
glog.Errorf("Unable to start Sentry for panic tracing: %v", err)
24-
return
61+
return doNothing
2562
}
2663
glog.Infof("Process will log all panics and errors to Sentry.")
2764
utilruntime.ReallyCrash = false
2865
utilruntime.PanicHandlers = append(utilruntime.PanicHandlers, m.CapturePanic)
2966
utilruntime.ErrorHandlers = append(utilruntime.ErrorHandlers, m.CaptureError)
30-
fn = func() {
67+
return func() {
3168
if r := recover(); r != nil {
3269
m.CapturePanicAndWait(r, 2*time.Second)
3370
panic(r)
@@ -36,8 +73,20 @@ func BehaviorOnPanic(mode string) (fn func()) {
3673
case len(mode) == 0:
3774
// default panic behavior
3875
utilruntime.ReallyCrash = false
76+
return doNothing
77+
3978
default:
4079
glog.Errorf("Unrecognized panic behavior")
80+
return doNothing
81+
}
82+
}
83+
84+
func crashOnDelay(delay time.Duration, delayString string) func(interface{}) {
85+
return func(in interface{}) {
86+
go func() {
87+
glog.Errorf("Panic happened. Process will crash in %v.", delayString)
88+
time.Sleep(delay)
89+
panic(in)
90+
}()
4191
}
42-
return
4392
}
+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package serviceability
2+
3+
import (
4+
"testing"
5+
"time"
6+
7+
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
8+
)
9+
10+
func TestPanicDelayingDeath(t *testing.T) {
11+
BehaviorOnPanic(`["crash-after-delay:10s"]`)
12+
13+
utilruntime.ReallyCrash = false
14+
go func() {
15+
defer utilruntime.HandleCrash()
16+
panic("not dead yet!")
17+
}()
18+
19+
select {
20+
case <-time.After(5 * time.Second):
21+
t.Log("beat death!")
22+
}
23+
}

0 commit comments

Comments
 (0)