Skip to content

Commit a9ae325

Browse files
committed
add --wait-for-node-deletion flag
causes the machine controller to re-queue a machine if node deletion fails. Without the flag reconciliation just continues and the error is ignored.
1 parent 0d01205 commit a9ae325

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

controllers/machine_controller.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ type MachineReconciler struct {
8181
// WatchFilterValue is the label value used to filter events prior to reconciliation.
8282
WatchFilterValue string
8383

84+
WaitForNodeDeletion bool
85+
8486
controller controller.Controller
8587
recorder record.EventRecorder
8688
externalTracker external.ObjectTracker
@@ -385,9 +387,13 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste
385387
return true, nil
386388
})
387389
if waitErr != nil {
388-
log.Error(deleteNodeErr, "Timed out deleting node, moving on", "node", m.Status.NodeRef.Name)
390+
log.Error(deleteNodeErr, "Timed out deleting node", "node", m.Status.NodeRef.Name)
389391
conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityWarning, "")
390392
r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDeleteNode", "error deleting Machine's node: %v", deleteNodeErr)
393+
if r.WaitForNodeDeletion {
394+
return ctrl.Result{}, deleteNodeErr
395+
}
396+
log.Info("Waiting for node deletion is disabled, moving on", "node", m.Status.NodeRef.Name)
391397
}
392398
}
393399

main.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ var (
8080
machinePoolConcurrency int
8181
clusterResourceSetConcurrency int
8282
machineHealthCheckConcurrency int
83+
waitForNodeDeletion bool
8384
syncPeriod time.Duration
8485
webhookPort int
8586
webhookCertDir string
@@ -160,6 +161,9 @@ func InitFlags(fs *pflag.FlagSet) {
160161
fs.IntVar(&machineHealthCheckConcurrency, "machinehealthcheck-concurrency", 10,
161162
"Number of machine health checks to process simultaneously")
162163

164+
fs.BoolVar(&waitForNodeDeletion, "wait-for-node-deletion", false,
165+
"If set, the Machine controller will postpone deletion of a Machine until the corresponding node is deleted in the Cluster's API.")
166+
163167
fs.DurationVar(&syncPeriod, "sync-period", 10*time.Minute,
164168
"The minimum interval at which watched resources are reconciled (e.g. 15m)")
165169

@@ -338,9 +342,10 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) {
338342
os.Exit(1)
339343
}
340344
if err := (&controllers.MachineReconciler{
341-
Client: mgr.GetClient(),
342-
Tracker: tracker,
343-
WatchFilterValue: watchFilterValue,
345+
Client: mgr.GetClient(),
346+
Tracker: tracker,
347+
WatchFilterValue: watchFilterValue,
348+
WaitForNodeDeletion: waitForNodeDeletion,
344349
}).SetupWithManager(ctx, mgr, concurrency(machineConcurrency)); err != nil {
345350
setupLog.Error(err, "unable to create controller", "controller", "Machine")
346351
os.Exit(1)

0 commit comments

Comments
 (0)