Skip to content

Commit ca54369

Browse files
chrischdimquhuy
authored andcommitted
✨ machine: Introduce Deletion status field and add timestamps for drain and volumeDetach instead of using the condition (kubernetes-sigs#11166)
* machine: Introduce Deletion status field and add timestamps for drain and volumeDetach instead of using the condition * fix tests * make generate * review fixes * fix openapi gen * review fixes * fix
1 parent 33bc162 commit ca54369

10 files changed

+150
-55
lines changed

api/v1beta1/machine_types.go

+22
Original file line numberDiff line numberDiff line change
@@ -230,10 +230,32 @@ type MachineStatus struct {
230230
// Conditions defines current service state of the Machine.
231231
// +optional
232232
Conditions Conditions `json:"conditions,omitempty"`
233+
234+
// deletion contains information relating to removal of the Machine.
235+
// Only present when the Machine has a deletionTimestamp and drain or wait for volume detach started.
236+
// +optional
237+
Deletion *MachineDeletionStatus `json:"deletion,omitempty"`
233238
}
234239

235240
// ANCHOR_END: MachineStatus
236241

242+
// MachineDeletionStatus is the deletion state of the Machine.
243+
type MachineDeletionStatus struct {
244+
// nodeDrainStartTime is the time when the drain of the node started and is used to determine
245+
// if the NodeDrainTimeout is exceeded.
246+
// Only present when the Machine has a deletionTimestamp and draining the node had been started.
247+
// +optional
248+
NodeDrainStartTime *metav1.Time `json:"nodeDrainStartTime,omitempty"`
249+
250+
// waitForNodeVolumeDetachStartTime is the time when waiting for volume detachment started
251+
// and is used to determine if the NodeVolumeDetachTimeout is exceeded.
252+
// Detaching volumes from nodes is usually done by CSI implementations and the current state
253+
// is observed from the node's `.Status.VolumesAttached` field.
254+
// Only present when the Machine has a deletionTimestamp and waiting for volume detachments had been started.
255+
// +optional
256+
WaitForNodeVolumeDetachStartTime *metav1.Time `json:"waitForNodeVolumeDetachStartTime,omitempty"`
257+
}
258+
237259
// SetTypedPhase sets the Phase field to the string representation of MachinePhase.
238260
func (m *MachineStatus) SetTypedPhase(p MachinePhase) {
239261
m.Phase = string(p)

api/v1beta1/zz_generated.deepcopy.go

+28
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1beta1/zz_generated.openapi.go

+35-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/cluster.x-k8s.io_machines.yaml

+22
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/apis/core/v1alpha3/conversion.go

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ func (src *Machine) ConvertTo(dstRaw conversion.Hub) error {
101101
dst.Spec.NodeVolumeDetachTimeout = restored.Spec.NodeVolumeDetachTimeout
102102
dst.Status.NodeInfo = restored.Status.NodeInfo
103103
dst.Status.CertificatesExpiryDate = restored.Status.CertificatesExpiryDate
104+
dst.Status.Deletion = restored.Status.Deletion
104105
return nil
105106
}
106107

internal/apis/core/v1alpha3/zz_generated.conversion.go

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/apis/core/v1alpha4/conversion.go

+1
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ func (src *Machine) ConvertTo(dstRaw conversion.Hub) error {
190190
dst.Spec.NodeDeletionTimeout = restored.Spec.NodeDeletionTimeout
191191
dst.Status.CertificatesExpiryDate = restored.Status.CertificatesExpiryDate
192192
dst.Spec.NodeVolumeDetachTimeout = restored.Spec.NodeVolumeDetachTimeout
193+
dst.Status.Deletion = restored.Status.Deletion
193194
return nil
194195
}
195196

internal/apis/core/v1alpha4/zz_generated.conversion.go

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/controllers/machine/machine_controller.go

+27-18
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"k8s.io/apimachinery/pkg/util/wait"
3333
"k8s.io/client-go/tools/record"
3434
"k8s.io/klog/v2"
35+
"k8s.io/utils/ptr"
3536
ctrl "sigs.k8s.io/controller-runtime"
3637
"sigs.k8s.io/controller-runtime/pkg/builder"
3738
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -388,13 +389,18 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu
388389
return ctrl.Result{}, err
389390
}
390391

391-
// The DrainingSucceededCondition never exists before the node is drained for the first time,
392-
// so its transition time can be used to record the first time draining.
393-
// This `if` condition prevents the transition time to be changed more than once.
392+
// The DrainingSucceededCondition never exists before the node is drained for the first time.
394393
if conditions.Get(m, clusterv1.DrainingSucceededCondition) == nil {
395394
conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion")
396395
}
397396

397+
if m.Status.Deletion == nil {
398+
m.Status.Deletion = &clusterv1.MachineDeletionStatus{}
399+
}
400+
if m.Status.Deletion.NodeDrainStartTime == nil {
401+
m.Status.Deletion.NodeDrainStartTime = ptr.To(metav1.Now())
402+
}
403+
398404
if err := patchMachine(ctx, patchHelper, m); err != nil {
399405
return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine")
400406
}
@@ -418,13 +424,18 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu
418424
// volumes are detached before proceeding to delete the Node.
419425
// In case the node is unreachable, the detachment is skipped.
420426
if r.isNodeVolumeDetachingAllowed(m) {
421-
// The VolumeDetachSucceededCondition never exists before we wait for volume detachment for the first time,
422-
// so its transition time can be used to record the first time we wait for volume detachment.
423-
// This `if` condition prevents the transition time to be changed more than once.
427+
// The VolumeDetachSucceededCondition never exists before we wait for volume detachment for the first time.
424428
if conditions.Get(m, clusterv1.VolumeDetachSucceededCondition) == nil {
425429
conditions.MarkFalse(m, clusterv1.VolumeDetachSucceededCondition, clusterv1.WaitingForVolumeDetachReason, clusterv1.ConditionSeverityInfo, "Waiting for node volumes to be detached")
426430
}
427431

432+
if m.Status.Deletion == nil {
433+
m.Status.Deletion = &clusterv1.MachineDeletionStatus{}
434+
}
435+
if m.Status.Deletion.WaitForNodeVolumeDetachStartTime == nil {
436+
m.Status.Deletion.WaitForNodeVolumeDetachStartTime = ptr.To(metav1.Now())
437+
}
438+
428439
if ok, err := r.shouldWaitForNodeVolumes(ctx, cluster, m.Status.NodeRef.Name); ok || err != nil {
429440
if err != nil {
430441
r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedWaitForVolumeDetach", "error waiting for node volumes detaching, Machine's node %q: %v", m.Status.NodeRef.Name, err)
@@ -540,38 +551,36 @@ func (r *Reconciler) isNodeVolumeDetachingAllowed(m *clusterv1.Machine) bool {
540551

541552
func (r *Reconciler) nodeDrainTimeoutExceeded(machine *clusterv1.Machine) bool {
542553
// if the NodeDrainTimeout type is not set by user
543-
if machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 {
554+
if machine.Status.Deletion == nil || machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 {
544555
return false
545556
}
546557

547-
// if the draining succeeded condition does not exist
548-
if conditions.Get(machine, clusterv1.DrainingSucceededCondition) == nil {
558+
// if the NodeDrainStartTime does not exist
559+
if machine.Status.Deletion.NodeDrainStartTime == nil {
549560
return false
550561
}
551562

552563
now := time.Now()
553-
firstTimeDrain := conditions.GetLastTransitionTime(machine, clusterv1.DrainingSucceededCondition)
554-
diff := now.Sub(firstTimeDrain.Time)
564+
diff := now.Sub(machine.Status.Deletion.NodeDrainStartTime.Time)
555565
return diff.Seconds() >= machine.Spec.NodeDrainTimeout.Seconds()
556566
}
557567

558568
// nodeVolumeDetachTimeoutExceeded returns False if either NodeVolumeDetachTimeout is set to nil or <=0 OR
559-
// VolumeDetachSucceededCondition is not set on the Machine. Otherwise returns true if the timeout is expired
560-
// since the last transition time of VolumeDetachSucceededCondition.
569+
// WaitForNodeVolumeDetachStartTime is not set on the Machine. Otherwise returns true if the timeout is expired
570+
// since the WaitForNodeVolumeDetachStartTime.
561571
func (r *Reconciler) nodeVolumeDetachTimeoutExceeded(machine *clusterv1.Machine) bool {
562572
// if the NodeVolumeDetachTimeout type is not set by user
563-
if machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 {
573+
if machine.Status.Deletion == nil || machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 {
564574
return false
565575
}
566576

567-
// if the volume detaching succeeded condition does not exist
568-
if conditions.Get(machine, clusterv1.VolumeDetachSucceededCondition) == nil {
577+
// if the WaitForNodeVolumeDetachStartTime does not exist
578+
if machine.Status.Deletion.WaitForNodeVolumeDetachStartTime == nil {
569579
return false
570580
}
571581

572582
now := time.Now()
573-
firstTimeDetach := conditions.GetLastTransitionTime(machine, clusterv1.VolumeDetachSucceededCondition)
574-
diff := now.Sub(firstTimeDetach.Time)
583+
diff := now.Sub(machine.Status.Deletion.WaitForNodeVolumeDetachStartTime.Time)
575584
return diff.Seconds() >= machine.Spec.NodeVolumeDetachTimeout.Seconds()
576585
}
577586

internal/controllers/machine/machine_controller_test.go

+12-36
Original file line numberDiff line numberDiff line change
@@ -1386,12 +1386,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
13861386
},
13871387

13881388
Status: clusterv1.MachineStatus{
1389-
Conditions: clusterv1.Conditions{
1390-
{
1391-
Type: clusterv1.DrainingSucceededCondition,
1392-
Status: corev1.ConditionFalse,
1393-
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 70)).UTC()},
1394-
},
1389+
Deletion: &clusterv1.MachineDeletionStatus{
1390+
NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 70)).UTC()},
13951391
},
13961392
},
13971393
},
@@ -1412,12 +1408,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
14121408
NodeDrainTimeout: &metav1.Duration{Duration: time.Second * 60},
14131409
},
14141410
Status: clusterv1.MachineStatus{
1415-
Conditions: clusterv1.Conditions{
1416-
{
1417-
Type: clusterv1.DrainingSucceededCondition,
1418-
Status: corev1.ConditionFalse,
1419-
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
1420-
},
1411+
Deletion: &clusterv1.MachineDeletionStatus{
1412+
NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
14211413
},
14221414
},
14231415
},
@@ -1437,12 +1429,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
14371429
Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
14381430
},
14391431
Status: clusterv1.MachineStatus{
1440-
Conditions: clusterv1.Conditions{
1441-
{
1442-
Type: clusterv1.DrainingSucceededCondition,
1443-
Status: corev1.ConditionFalse,
1444-
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
1445-
},
1432+
Deletion: &clusterv1.MachineDeletionStatus{
1433+
NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
14461434
},
14471435
},
14481436
},
@@ -1896,12 +1884,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
18961884
},
18971885

18981886
Status: clusterv1.MachineStatus{
1899-
Conditions: clusterv1.Conditions{
1900-
{
1901-
Type: clusterv1.VolumeDetachSucceededCondition,
1902-
Status: corev1.ConditionFalse,
1903-
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 60)).UTC()},
1904-
},
1887+
Deletion: &clusterv1.MachineDeletionStatus{
1888+
WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 60)).UTC()},
19051889
},
19061890
},
19071891
},
@@ -1922,12 +1906,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
19221906
NodeVolumeDetachTimeout: &metav1.Duration{Duration: time.Second * 60},
19231907
},
19241908
Status: clusterv1.MachineStatus{
1925-
Conditions: clusterv1.Conditions{
1926-
{
1927-
Type: clusterv1.VolumeDetachSucceededCondition,
1928-
Status: corev1.ConditionFalse,
1929-
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
1930-
},
1909+
Deletion: &clusterv1.MachineDeletionStatus{
1910+
WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
19311911
},
19321912
},
19331913
},
@@ -1947,12 +1927,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
19471927
Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
19481928
},
19491929
Status: clusterv1.MachineStatus{
1950-
Conditions: clusterv1.Conditions{
1951-
{
1952-
Type: clusterv1.VolumeDetachSucceededCondition,
1953-
Status: corev1.ConditionFalse,
1954-
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
1955-
},
1930+
Deletion: &clusterv1.MachineDeletionStatus{
1931+
WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
19561932
},
19571933
},
19581934
},

0 commit comments

Comments
 (0)