Skip to content

Commit b743ca7

Browse files
Claudio Bussemjlshen
Claudio Busse
andcommitted
fix: delayed MHC replacement of unreachable nodes
Co-authored-by: Michael Shen <[email protected]>
1 parent 0539a29 commit b743ca7

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

internal/controllers/machine/machine_controller.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -670,8 +670,18 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
670670
}
671671

672672
if noderefutil.IsNodeUnreachable(node) {
673-
// When the node is unreachable and some pods are not evicted for as long as this timeout, we ignore them.
674-
drainer.SkipWaitForDeleteTimeoutSeconds = 60 * 5 // 5 minutes
673+
// Kubelet is unreachable, pods will never disappear.
674+
675+
// SkipWaitForDeleteTimeoutSeconds ensures the drain completes
676+
// even if pod objects are not deleted.
677+
drainer.SkipWaitForDeleteTimeoutSeconds = 1
678+
679+
// kube-apiserver sets the `deletionTimestamp` to a future date computed using the grace period.
680+
// We are effectively waiting for GracePeriodSeconds + SkipWaitForDeleteTimeoutSeconds.
681+
// Override the grace period of pods to reduce the time needed to skip them.
682+
drainer.GracePeriodSeconds = 1
683+
684+
log.V(5).Info("Node is unreachable, draining will ignore gracePeriod. PDBs are still honored.")
675685
}
676686

677687
if err := kubedrain.RunCordonOrUncordon(drainer, node, true); err != nil {

0 commit comments

Comments
 (0)