Skip to content

Commit 8bd926d

Browse files
committed
Implement Machine Deleting condition
1 parent ff13a64 commit 8bd926d

20 files changed

+1314
-322
lines changed

api/v1beta1/machine_types.go

+56
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,9 @@ const (
196196
// MachineNodeConditionNotYetReportedV1Beta2Reason surfaces when a Machine's Node doesn't have a condition reported yet.
197197
MachineNodeConditionNotYetReportedV1Beta2Reason = "NodeConditionNotYetReported"
198198

199+
// MachineNodeInternalErrorV1Beta2Reason surfaces unexpected failures when reading a Node object.
200+
MachineNodeInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason
201+
199202
// MachineNodeDoesNotExistV1Beta2Reason surfaces when the node hosted on the machine does not exist.
200203
// Note: this could happen when creating the machine. However, this state should be treated as an error if it lasts indefinitely.
201204
MachineNodeDoesNotExistV1Beta2Reason = ObjectDoesNotExistV1Beta2Reason
@@ -204,6 +207,11 @@ const (
204207
// Note: controllers can't identify if the Node was deleted by the controller itself, e.g.
205208
// during the deletion workflow, or by a users.
206209
MachineNodeDeletedV1Beta2Reason = ObjectDeletedV1Beta2Reason
210+
211+
// MachineNodeRemoteConnectionFailedV1Beta2Reason surfaces that the remote connection failed.
212+
// If the remote connection probe failed for longer than remote conditions grace period,
213+
// this reason is used when setting NodeHealthy and NodeReady conditions to `Unknown`.
214+
MachineNodeRemoteConnectionFailedV1Beta2Reason = RemoteConnectionFailedV1Beta2Reason
207215
)
208216

209217
// Machine's HealthCheckSucceeded condition and corresponding reasons that will be used in v1Beta2 API version.
@@ -271,6 +279,54 @@ const (
271279
const (
272280
// MachineDeletingV1Beta2Condition surfaces details about progress in the machine deletion workflow.
273281
MachineDeletingV1Beta2Condition = DeletingV1Beta2Condition
282+
283+
// MachineDeletingDeletingV1Beta2Reason surfaces when the Machine is deleting.
284+
// This reason is only used for the MachineDeletingV1Beta2Condition when calculating the
285+
// Ready condition when the deletionTimestamp on a Machine is set.
286+
MachineDeletingDeletingV1Beta2Reason = DeletingV1Beta2Condition
287+
288+
// MachineDeletingDeletionTimestampNotSetV1Beta2Reason surfaces when the Machine is not deleting because the
289+
// DeletionTimestamp is not set.
290+
MachineDeletingDeletionTimestampNotSetV1Beta2Reason = DeletionTimestampNotSetV1Beta2Reason
291+
292+
// MachineDeletingDeletionTimestampSetV1Beta2Reason surfaces when the Machine is deleting because the
293+
// DeletionTimestamp is set. This reason is used if none of the more specific reasons apply.
294+
MachineDeletingDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason
295+
296+
// MachineDeletingWaitingForPreDrainHookV1Beta2Reason surfaces when the Machine deletion
297+
// waits for pre-drain hooks to complete. I.e. it waits until there are no annotations
298+
// with the `pre-drain.delete.hook.machine.cluster.x-k8s.io` prefix on the Machine anymore.
299+
MachineDeletingWaitingForPreDrainHookV1Beta2Reason = "WaitingForPreDrainHook"
300+
301+
// MachineDeletingDrainingNodeV1Beta2Reason surfaces when the Machine deletion is draining the Node.
302+
MachineDeletingDrainingNodeV1Beta2Reason = "DrainingNode"
303+
304+
// MachineDeletingWaitingForVolumeDetachV1Beta2Reason surfaces when the Machine deletion is
305+
// waiting for volumes to detach from the Node.
306+
MachineDeletingWaitingForVolumeDetachV1Beta2Reason = "WaitingForVolumeDetach"
307+
308+
// MachineDeletingWaitingForPreTerminateHookV1Beta2Reason surfaces when the Machine deletion
309+
// waits for pre-terminate hooks to complete. I.e. it waits until there are no annotations
310+
// with the `pre-terminate.delete.hook.machine.cluster.x-k8s.io` prefix on the Machine anymore.
311+
MachineDeletingWaitingForPreTerminateHookV1Beta2Reason = "WaitingForPreTerminateHook"
312+
313+
// MachineDeletingWaitingForInfrastructureDeletionV1Beta2Reason surfaces when the Machine deletion
314+
// waits for InfraMachine deletion to complete.
315+
MachineDeletingWaitingForInfrastructureDeletionV1Beta2Reason = "WaitingForInfrastructureDeletion"
316+
317+
// MachineDeletingWaitingForBootstrapDeletionV1Beta2Reason surfaces when the Machine deletion
318+
// waits for BootstrapConfig deletion to complete.
319+
MachineDeletingWaitingForBootstrapDeletionV1Beta2Reason = "WaitingForBootstrapDeletion"
320+
321+
// MachineDeletingDeletingNodeV1Beta2Reason surfaces when the Machine deletion is
322+
// deleting the Node.
323+
MachineDeletingDeletingNodeV1Beta2Reason = "DeletingNode"
324+
325+
// MachineDeletingDeletionCompletedV1Beta2Reason surfaces when the Machine deletion has been completed.
326+
// This reason is set right after the `machine.cluster.x-k8s.io` finalizer is removed.
327+
// This means that the object will go away (i.e. be removed from etcd), except if there are other
328+
// finalizers on the Machine object.
329+
MachineDeletingDeletionCompletedV1Beta2Reason = DeletionCompletedV1Beta2Reason
274330
)
275331

276332
// Machine's Paused condition and corresponding reasons that will be used in v1Beta2 API version.

api/v1beta1/v1beta2_condition_consts.go

+19
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,25 @@ const (
112112

113113
// PausedV1Beta2Reason surfaces when an object is paused.
114114
PausedV1Beta2Reason = "Paused"
115+
116+
// RemoteConnectionFailedV1Beta2Reason surfaces that the remote connection failed.
117+
// This is typically used when setting remote conditions (e.g. `NodeHealthy`) to `Unknown`
118+
// after the remote connection probe didn't succeed for remote conditions grace period.
119+
RemoteConnectionFailedV1Beta2Reason = "RemoteConnectionFailed"
120+
121+
// DeletionTimestampNotSetV1Beta2Reason surfaces when an object is not deleting because the
122+
// DeletionTimestamp is not set.
123+
DeletionTimestampNotSetV1Beta2Reason = "DeletionTimestampNotSet"
124+
125+
// DeletionTimestampSetV1Beta2Reason surfaces when an object is deleting because the
126+
// DeletionTimestamp is set. This reason is used if none of the more specific reasons apply.
127+
DeletionTimestampSetV1Beta2Reason = "DeletionTimestampSet"
128+
129+
// DeletionCompletedV1Beta2Reason surfaces when the deletion process has been completed.
130+
// This reason is set right after the corresponding finalizer is removed.
131+
// This means that the object will go away (i.e. be removed from etcd), except if there are other
132+
// finalizers on the object.
133+
DeletionCompletedV1Beta2Reason = "DeletionCompleted"
115134
)
116135

117136
// Conditions that will be used for the MachineSet object in v1Beta2 API version.

controllers/alias.go

+7-4
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,17 @@ type MachineReconciler struct {
7070

7171
// WatchFilterValue is the label value used to filter events prior to reconciliation.
7272
WatchFilterValue string
73+
74+
RemoteConditionsGracePeriod time.Duration
7375
}
7476

7577
func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
7678
return (&machinecontroller.Reconciler{
77-
Client: r.Client,
78-
APIReader: r.APIReader,
79-
ClusterCache: r.ClusterCache,
80-
WatchFilterValue: r.WatchFilterValue,
79+
Client: r.Client,
80+
APIReader: r.APIReader,
81+
ClusterCache: r.ClusterCache,
82+
WatchFilterValue: r.WatchFilterValue,
83+
RemoteConditionsGracePeriod: r.RemoteConditionsGracePeriod,
8184
}).SetupWithManager(ctx, mgr, options)
8285
}
8386

internal/controllers/clusterclass/clusterclass_controller.go

-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
7373
predicateLog := ctrl.LoggerFrom(ctx).WithValues("controller", "clusterclass")
7474
err := ctrl.NewControllerManagedBy(mgr).
7575
For(&clusterv1.ClusterClass{}).
76-
Named("clusterclass").
7776
WithOptions(options).
7877
Watches(
7978
&runtimev1.ExtensionConfig{},

internal/controllers/machine/drain/cache.go

-97
This file was deleted.

internal/controllers/machine/drain/drain.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -301,12 +301,12 @@ func (r EvictionResult) DrainCompleted() bool {
301301
}
302302

303303
// ConditionMessage returns a condition message for the case where a drain is not completed.
304-
func (r EvictionResult) ConditionMessage() string {
304+
func (r EvictionResult) ConditionMessage(nodeDrainStartTime *metav1.Time) string {
305305
if r.DrainCompleted() {
306306
return ""
307307
}
308308

309-
conditionMessage := "Drain not completed yet:"
309+
conditionMessage := fmt.Sprintf("Drain not completed yet (started at %s):", nodeDrainStartTime.Format(time.RFC3339))
310310
if len(r.PodsDeletionTimestampSet) > 0 {
311311
conditionMessage = fmt.Sprintf("%s\n* Pods with deletionTimestamp that still exist: %s",
312312
conditionMessage, PodListToString(r.PodsDeletionTimestampSet, 5))

internal/controllers/machine/drain/drain_test.go

+9-4
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,8 @@ func TestEvictPods(t *testing.T) {
777777
}
778778

779779
func TestEvictionResult_ConditionMessage(t *testing.T) {
780+
g := NewWithT(t)
781+
780782
tests := []struct {
781783
name string
782784
evictionResult EvictionResult
@@ -819,7 +821,7 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
819821
},
820822
},
821823
},
822-
wantConditionMessage: `Drain not completed yet:
824+
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
823825
* Pods with deletionTimestamp that still exist: pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1
824826
* Pods with eviction failed:
825827
* Cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently: pod-5-to-trigger-eviction-pdb-violated-1
@@ -949,7 +951,7 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
949951
},
950952
},
951953
},
952-
wantConditionMessage: `Drain not completed yet:
954+
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
953955
* Pods with deletionTimestamp that still exist: pod-2-deletionTimestamp-set-1, pod-2-deletionTimestamp-set-2, pod-2-deletionTimestamp-set-3, pod-3-to-trigger-eviction-successfully-1, pod-3-to-trigger-eviction-successfully-2, ... (2 more)
954956
* Pods with eviction failed:
955957
* Cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently: pod-5-to-trigger-eviction-pdb-violated-1, pod-5-to-trigger-eviction-pdb-violated-2, pod-5-to-trigger-eviction-pdb-violated-3, ... (3 more)
@@ -1024,7 +1026,7 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
10241026
},
10251027
},
10261028
},
1027-
wantConditionMessage: `Drain not completed yet:
1029+
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
10281030
* Pods with eviction failed:
10291031
* some other error 1: pod-1-to-trigger-eviction-some-other-error
10301032
* some other error 2: pod-2-to-trigger-eviction-some-other-error
@@ -1035,11 +1037,14 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
10351037
},
10361038
}
10371039

1040+
nodeDrainStartTime, err := time.Parse(time.RFC3339, "2024-10-09T16:13:59Z")
1041+
g.Expect(err).ToNot(HaveOccurred())
1042+
10381043
for _, tt := range tests {
10391044
t.Run(tt.name, func(t *testing.T) {
10401045
g := NewWithT(t)
10411046

1042-
g.Expect(tt.evictionResult.ConditionMessage()).To(Equal(tt.wantConditionMessage))
1047+
g.Expect(tt.evictionResult.ConditionMessage(&metav1.Time{Time: nodeDrainStartTime})).To(Equal(tt.wantConditionMessage))
10431048
})
10441049
}
10451050
}

0 commit comments

Comments
 (0)