Skip to content

Commit ba6758b

Browse files
committed
Ensure infrastructure is destroyed before deleting the node
If there were an edge case where we deleted an unreachable node and therefore removing any pod from etcd, but an stateful process were still running on the underlying host, we'll be immediately freeing up the pod name from the apiserver. "This would let the StatefulSet controller create a replacement Pod with that same identity; this can lead to the duplication of a still-running Pod, and if said Pod can still communicate with the other members of the StatefulSet, will violate the at most one semantics that StatefulSet is designed to guarantee." To mitigate this it'd be safer to delete the node only after the owned infraMachine is gone. This would give stronger guarantees that the underlying host is destroyed and so there's no chance of a stateful "leaked" process still running. kubernetes-sigs#2565
1 parent 88506f8 commit ba6758b

File tree

2 files changed

+196
-15
lines changed

2 files changed

+196
-15
lines changed

controllers/machine_controller.go

+23-15
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,8 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste
261261
logger := r.Log.WithValues("machine", m.Name, "namespace", m.Namespace)
262262
logger = logger.WithValues("cluster", cluster.Name)
263263

264-
if err := r.isDeleteNodeAllowed(ctx, m); err != nil {
264+
isDeleteNodeAllowed, err := r.isDeleteNodeAllowed(ctx, m)
265+
if err != nil {
265266
switch err {
266267
case errNilNodeRef:
267268
logger.Error(err, "Deleting node is not allowed")
@@ -271,7 +272,9 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste
271272
logger.Error(err, "IsDeleteNodeAllowed check failed")
272273
return ctrl.Result{}, err
273274
}
274-
} else {
275+
}
276+
277+
if isDeleteNodeAllowed {
275278
// Drain node before deletion
276279
if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeNodeDrainingAnnotation]; !exists {
277280
logger.Info("Draining node", "node", m.Status.NodeRef.Name)
@@ -281,6 +284,17 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste
281284
}
282285
r.recorder.Eventf(m, corev1.EventTypeNormal, "SuccessfulDrainNode", "success draining Machine's node %q", m.Status.NodeRef.Name)
283286
}
287+
}
288+
289+
if ok, err := r.reconcileDeleteExternal(ctx, m); !ok || err != nil {
290+
// Return early and don't remove the finalizer if we got an error or
291+
// the external reconciliation deletion isn't ready.
292+
return ctrl.Result{}, err
293+
}
294+
295+
// We only delete the node after the underlying infrastructure is gone.
296+
// https://github.com/kubernetes-sigs/cluster-api/issues/2565
297+
if isDeleteNodeAllowed {
284298
logger.Info("Deleting node", "node", m.Status.NodeRef.Name)
285299

286300
var deleteNodeErr error
@@ -296,28 +310,22 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste
296310
}
297311
}
298312

299-
if ok, err := r.reconcileDeleteExternal(ctx, m); !ok || err != nil {
300-
// Return early and don't remove the finalizer if we got an error or
301-
// the external reconciliation deletion isn't ready.
302-
return ctrl.Result{}, err
303-
}
304-
305313
controllerutil.RemoveFinalizer(m, clusterv1.MachineFinalizer)
306314
return ctrl.Result{}, nil
307315
}
308316

309-
// isDeleteNodeAllowed returns nil only if the Machine's NodeRef is not nil
317+
// isDeleteNodeAllowed returns true only if the Machine's NodeRef is not nil
310318
// and if the Machine is not the last control plane node in the cluster.
311-
func (r *MachineReconciler) isDeleteNodeAllowed(ctx context.Context, machine *clusterv1.Machine) error {
319+
func (r *MachineReconciler) isDeleteNodeAllowed(ctx context.Context, machine *clusterv1.Machine) (bool, error) {
312320
// Cannot delete something that doesn't exist.
313321
if machine.Status.NodeRef == nil {
314-
return errNilNodeRef
322+
return false, errNilNodeRef
315323
}
316324

317325
// Get all of the machines that belong to this cluster.
318326
machines, err := getActiveMachinesInCluster(ctx, r.Client, machine.Namespace, machine.Labels[clusterv1.ClusterLabelName])
319327
if err != nil {
320-
return err
328+
return false, err
321329
}
322330

323331
// Whether or not it is okay to delete the NodeRef depends on the
@@ -327,14 +335,14 @@ func (r *MachineReconciler) isDeleteNodeAllowed(ctx context.Context, machine *cl
327335
case numControlPlaneMachines == 0:
328336
// Do not delete the NodeRef if there are no remaining members of
329337
// the control plane.
330-
return errNoControlPlaneNodes
338+
return false, errNoControlPlaneNodes
331339
case numControlPlaneMachines == 1 && util.IsControlPlaneMachine(machine):
332340
// Do not delete the NodeRef if this is the last member of the
333341
// control plane.
334-
return errLastControlPlaneNode
342+
return false, errLastControlPlaneNode
335343
default:
336344
// Otherwise it is okay to delete the NodeRef.
337-
return nil
345+
return true, nil
338346
}
339347
}
340348

controllers/machine_controller_test.go

+173
Original file line numberDiff line numberDiff line change
@@ -784,3 +784,176 @@ func Test_clusterToActiveMachines(t *testing.T) {
784784
g.Expect(got).To(Equal(tt.want))
785785
}
786786
}
787+
788+
func TestIsDeleteNodeAllowed(t *testing.T) {
789+
testCases := []struct {
790+
name string
791+
machine *clusterv1.Machine
792+
expected bool
793+
expectedError error
794+
}{
795+
{
796+
name: "machine without nodeRef",
797+
machine: &clusterv1.Machine{
798+
ObjectMeta: metav1.ObjectMeta{
799+
Name: "created",
800+
Namespace: "default",
801+
Finalizers: []string{clusterv1.MachineFinalizer, metav1.FinalizerDeleteDependents},
802+
},
803+
Spec: clusterv1.MachineSpec{
804+
ClusterName: "test-cluster",
805+
InfrastructureRef: corev1.ObjectReference{},
806+
Bootstrap: clusterv1.Bootstrap{Data: pointer.StringPtr("data")},
807+
},
808+
Status: clusterv1.MachineStatus{},
809+
},
810+
expected: false,
811+
expectedError: errNilNodeRef,
812+
},
813+
{
814+
name: "no control plane members",
815+
machine: &clusterv1.Machine{
816+
ObjectMeta: metav1.ObjectMeta{
817+
Name: "created",
818+
Namespace: "default",
819+
Finalizers: []string{clusterv1.MachineFinalizer, metav1.FinalizerDeleteDependents},
820+
},
821+
Spec: clusterv1.MachineSpec{
822+
ClusterName: "test-cluster",
823+
InfrastructureRef: corev1.ObjectReference{},
824+
Bootstrap: clusterv1.Bootstrap{Data: pointer.StringPtr("data")},
825+
},
826+
Status: clusterv1.MachineStatus{
827+
NodeRef: &corev1.ObjectReference{
828+
Name: "test",
829+
},
830+
},
831+
},
832+
expected: false,
833+
expectedError: errNoControlPlaneNodes,
834+
},
835+
{
836+
name: "is last control plane members",
837+
machine: &clusterv1.Machine{
838+
ObjectMeta: metav1.ObjectMeta{
839+
Name: "created",
840+
Namespace: "default",
841+
Labels: map[string]string{
842+
clusterv1.ClusterLabelName: "test",
843+
clusterv1.MachineControlPlaneLabelName: "",
844+
},
845+
Finalizers: []string{clusterv1.MachineFinalizer, metav1.FinalizerDeleteDependents},
846+
},
847+
Spec: clusterv1.MachineSpec{
848+
ClusterName: "test-cluster",
849+
InfrastructureRef: corev1.ObjectReference{},
850+
Bootstrap: clusterv1.Bootstrap{Data: pointer.StringPtr("data")},
851+
},
852+
Status: clusterv1.MachineStatus{
853+
NodeRef: &corev1.ObjectReference{
854+
Name: "test",
855+
},
856+
},
857+
},
858+
expected: false,
859+
expectedError: errLastControlPlaneNode,
860+
},
861+
{
862+
name: "has nodeRef and control plane is healthy",
863+
machine: &clusterv1.Machine{
864+
ObjectMeta: metav1.ObjectMeta{
865+
Name: "created",
866+
Namespace: "default",
867+
Labels: map[string]string{
868+
clusterv1.ClusterLabelName: "test",
869+
},
870+
Finalizers: []string{clusterv1.MachineFinalizer, metav1.FinalizerDeleteDependents},
871+
},
872+
Spec: clusterv1.MachineSpec{
873+
ClusterName: "test-cluster",
874+
InfrastructureRef: corev1.ObjectReference{},
875+
Bootstrap: clusterv1.Bootstrap{Data: pointer.StringPtr("data")},
876+
},
877+
Status: clusterv1.MachineStatus{
878+
NodeRef: &corev1.ObjectReference{
879+
Name: "test",
880+
},
881+
},
882+
},
883+
expected: true,
884+
expectedError: nil,
885+
},
886+
}
887+
888+
for _, tc := range testCases {
889+
t.Run(tc.name, func(t *testing.T) {
890+
g := NewWithT(t)
891+
892+
m1 := &clusterv1.Machine{
893+
ObjectMeta: metav1.ObjectMeta{
894+
Name: "cp1",
895+
Namespace: "default",
896+
Labels: map[string]string{
897+
clusterv1.ClusterLabelName: "test",
898+
},
899+
Finalizers: []string{clusterv1.MachineFinalizer, metav1.FinalizerDeleteDependents},
900+
},
901+
Spec: clusterv1.MachineSpec{
902+
ClusterName: "test-cluster",
903+
InfrastructureRef: corev1.ObjectReference{},
904+
Bootstrap: clusterv1.Bootstrap{Data: pointer.StringPtr("data")},
905+
},
906+
Status: clusterv1.MachineStatus{
907+
NodeRef: &corev1.ObjectReference{
908+
Name: "test1",
909+
},
910+
},
911+
}
912+
m2 := &clusterv1.Machine{
913+
ObjectMeta: metav1.ObjectMeta{
914+
Name: "cp2",
915+
Namespace: "default",
916+
Labels: map[string]string{
917+
clusterv1.ClusterLabelName: "test",
918+
},
919+
Finalizers: []string{clusterv1.MachineFinalizer, metav1.FinalizerDeleteDependents},
920+
},
921+
Spec: clusterv1.MachineSpec{
922+
ClusterName: "test-cluster",
923+
InfrastructureRef: corev1.ObjectReference{},
924+
Bootstrap: clusterv1.Bootstrap{Data: pointer.StringPtr("data")},
925+
},
926+
Status: clusterv1.MachineStatus{
927+
NodeRef: &corev1.ObjectReference{
928+
Name: "test2",
929+
},
930+
},
931+
}
932+
// For isDeleteNodeAllowed to be true we assume a healthy control plane.
933+
if tc.expected {
934+
m1.Labels[clusterv1.MachineControlPlaneLabelName] = ""
935+
m2.Labels[clusterv1.MachineControlPlaneLabelName] = ""
936+
}
937+
938+
mr := &MachineReconciler{
939+
Client: fake.NewFakeClientWithScheme(
940+
scheme.Scheme,
941+
tc.machine,
942+
m1,
943+
m2,
944+
),
945+
Log: log.Log,
946+
scheme: scheme.Scheme,
947+
}
948+
949+
isDeleteNodeAllowed, err := mr.isDeleteNodeAllowed(context.TODO(), tc.machine);
950+
g.Expect(isDeleteNodeAllowed).To(Equal(tc.expected))
951+
if tc.expectedError == nil {
952+
g.Expect(err).To(BeNil())
953+
} else {
954+
g.Expect(err).To(Equal(tc.expectedError))
955+
}
956+
957+
})
958+
}
959+
}

0 commit comments

Comments
 (0)