Skip to content

Commit 21fe7f2

Browse files
committed
Introduce count thresholds for unrecoverable pod events
Enable more fine-grained management of unrecoverable pod events during workspace startup by having different thresholds for how many times an event can be seen before it is considered fatal. Signed-off-by: Angel Misevski <[email protected]>
1 parent 56d187b commit 21fe7f2

File tree

1 file changed

+19
-9
lines changed

1 file changed

+19
-9
lines changed

pkg/provision/workspace/deployment.go

+19-9
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,16 @@ var containerFailureStateReasons = []string{
5151
"RunContainerError",
5252
}
5353

54-
var unrecoverablePodEventReasons = []string{
55-
"FailedPostStartHook",
56-
"FailedMount",
57-
"FailedScheduling",
58-
"FailedCreate",
59-
"ReplicaSetCreateError",
54+
// unrecoverablePodEventReasons contains Kubernetes events that should fail workspace startup
55+
// if they occur related to a workspace pod. Events are stored as a map with event names as keys
56+
// and values representing the threshold of how many times we can see an event before it is considered
57+
// unrecoverable.
58+
var unrecoverablePodEventReasons = map[string]int32{
59+
"FailedPostStartHook": 1,
60+
"FailedMount": 3,
61+
"FailedScheduling": 1,
62+
"FailedCreate": 1,
63+
"ReplicaSetCreateError": 1,
6064
}
6165

6266
var unrecoverableDeploymentConditionReasons = []string{
@@ -473,9 +477,15 @@ func checkPodEvents(pod *corev1.Pod, workspaceID string, clusterAPI sync.Cluster
473477
continue
474478
}
475479

476-
for _, fatalEv := range unrecoverablePodEventReasons {
477-
if ev.Reason == fatalEv && !checkIfUnrecoverableEventIgnored(ev.Reason) {
478-
return fmt.Sprintf("Detected unrecoverable event %s: %s", ev.Reason, ev.Message), nil
480+
if maxCount, isUnrecoverableEvent := unrecoverablePodEventReasons[ev.Reason]; isUnrecoverableEvent {
481+
if !checkIfUnrecoverableEventIgnored(ev.Reason) && ev.Count >= maxCount {
482+
var msg string
483+
if ev.Count > 1 {
484+
msg = fmt.Sprintf("Detected unrecoverable event %s %d times: %s", ev.Reason, ev.Count, ev.Message)
485+
} else {
486+
msg = fmt.Sprintf("Detected unrecoverable event %s: %s", ev.Reason, ev.Message)
487+
}
488+
return msg, nil
479489
}
480490
}
481491
}

0 commit comments

Comments
 (0)