Skip to content

Commit 42fae34

Browse files
committed
Issue-10544 ignore unreachable cluster while deleting machinePool
Signed-off-by: melserngawy <[email protected]>
1 parent e1a701f commit 42fae34

File tree

5 files changed

+162
-22
lines changed

5 files changed

+162
-22
lines changed

exp/internal/controllers/machinepool_controller.go

+39-16
Original file line numberDiff line numberDiff line change
@@ -253,41 +253,64 @@ func (r *MachinePoolReconciler) reconcile(ctx context.Context, cluster *clusterv
253253
return res, kerrors.NewAggregate(errs)
254254
}
255255

256-
func (r *MachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, mp *expv1.MachinePool) error {
257-
if ok, err := r.reconcileDeleteExternal(ctx, mp); !ok || err != nil {
256+
// reconcileDelete delete machinePool related resources.
257+
func (r *MachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool) error {
258+
if ok, err := r.reconcileDeleteExternal(ctx, machinePool); !ok || err != nil {
258259
// Return early and don't remove the finalizer if we got an error or
259260
// the external reconciliation deletion isn't ready.
260261
return err
261262
}
262263

263-
if err := r.reconcileDeleteNodes(ctx, cluster, mp); err != nil {
264-
// Return early and don't remove the finalizer if we got an error.
265-
return err
264+
// check nodes delete timeout passed.
265+
if !r.isMachinePoolNodeDeleteTimeoutPassed(machinePool) {
266+
if err := r.reconcileDeleteNodes(ctx, cluster, machinePool); err != nil {
267+
// Return early and don't remove the finalizer if we got an error.
268+
return err
269+
}
266270
}
267271

268-
controllerutil.RemoveFinalizer(mp, expv1.MachinePoolFinalizer)
272+
controllerutil.RemoveFinalizer(machinePool, expv1.MachinePoolFinalizer)
269273
return nil
270274
}
271275

272-
func (r *MachinePoolReconciler) reconcileDeleteNodes(ctx context.Context, cluster *clusterv1.Cluster, machinepool *expv1.MachinePool) error {
273-
if len(machinepool.Status.NodeRefs) == 0 {
276+
// reconcileDeleteNodes delete the cluster nodes.
277+
func (r *MachinePoolReconciler) reconcileDeleteNodes(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool) error {
278+
if len(machinePool.Status.NodeRefs) == 0 {
274279
return nil
275280
}
276281

282+
if r.Tracker == nil {
283+
return errors.New("Cannot establish cluster client to delete nodes")
284+
}
285+
277286
clusterClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster))
278287
if err != nil {
279288
return err
280289
}
281290

282-
return r.deleteRetiredNodes(ctx, clusterClient, machinepool.Status.NodeRefs, machinepool.Spec.ProviderIDList)
291+
return r.deleteRetiredNodes(ctx, clusterClient, machinePool.Status.NodeRefs, machinePool.Spec.ProviderIDList)
292+
}
293+
294+
// isMachinePoolDeleteTimeoutPassed check the machinePool node delete time out.
295+
func (r *MachinePoolReconciler) isMachinePoolNodeDeleteTimeoutPassed(machinePool *expv1.MachinePool) bool {
296+
if !machinePool.DeletionTimestamp.IsZero() && machinePool.Spec.Template.Spec.NodeDeletionTimeout != nil {
297+
if machinePool.Spec.Template.Spec.NodeDeletionTimeout.Duration.Nanoseconds() != 0 {
298+
return machinePool.DeletionTimestamp.Add(machinePool.Spec.Template.Spec.NodeDeletionTimeout.Duration).After(time.Now())
299+
}
300+
}
301+
return false
283302
}
284303

285304
// reconcileDeleteExternal tries to delete external references, returning true if it cannot find any.
286-
func (r *MachinePoolReconciler) reconcileDeleteExternal(ctx context.Context, m *expv1.MachinePool) (bool, error) {
305+
func (r *MachinePoolReconciler) reconcileDeleteExternal(ctx context.Context, machinePool *expv1.MachinePool) (bool, error) {
287306
objects := []*unstructured.Unstructured{}
288-
references := []*corev1.ObjectReference{
289-
m.Spec.Template.Spec.Bootstrap.ConfigRef,
290-
&m.Spec.Template.Spec.InfrastructureRef,
307+
references := []*corev1.ObjectReference{}
308+
// check for external ref
309+
if machinePool.Spec.Template.Spec.Bootstrap.ConfigRef != nil {
310+
references = append(references, machinePool.Spec.Template.Spec.Bootstrap.ConfigRef)
311+
}
312+
if machinePool.Spec.Template.Spec.InfrastructureRef != (corev1.ObjectReference{}) {
313+
references = append(references, &machinePool.Spec.Template.Spec.InfrastructureRef)
291314
}
292315

293316
// Loop over the references and try to retrieve it with the client.
@@ -296,10 +319,10 @@ func (r *MachinePoolReconciler) reconcileDeleteExternal(ctx context.Context, m *
296319
continue
297320
}
298321

299-
obj, err := external.Get(ctx, r.Client, ref, m.Namespace)
322+
obj, err := external.Get(ctx, r.Client, ref, machinePool.Namespace)
300323
if err != nil && !apierrors.IsNotFound(errors.Cause(err)) {
301324
return false, errors.Wrapf(err, "failed to get %s %q for MachinePool %q in namespace %q",
302-
ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace)
325+
ref.GroupVersionKind(), ref.Name, machinePool.Name, machinePool.Namespace)
303326
}
304327
if obj != nil {
305328
objects = append(objects, obj)
@@ -311,7 +334,7 @@ func (r *MachinePoolReconciler) reconcileDeleteExternal(ctx context.Context, m *
311334
if err := r.Client.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) {
312335
return false, errors.Wrapf(err,
313336
"failed to delete %v %q for MachinePool %q in namespace %q",
314-
obj.GroupVersionKind(), obj.GetName(), m.Name, m.Namespace)
337+
obj.GroupVersionKind(), obj.GetName(), machinePool.Name, machinePool.Namespace)
315338
}
316339
}
317340

exp/internal/controllers/machinepool_controller_test.go

+111-6
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ limitations under the License.
1717
package controllers
1818

1919
import (
20+
"context"
2021
"testing"
22+
"time"
2123

2224
. "github.com/onsi/gomega"
2325
corev1 "k8s.io/api/core/v1"
@@ -31,6 +33,7 @@ import (
3133
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3234

3335
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
36+
"sigs.k8s.io/cluster-api/controllers/remote"
3437
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
3538
"sigs.k8s.io/cluster-api/internal/test/builder"
3639
"sigs.k8s.io/cluster-api/util"
@@ -277,8 +280,8 @@ func TestReconcileMachinePoolRequest(t *testing.T) {
277280
},
278281
}
279282

280-
time := metav1.Now()
281-
283+
timeNow := metav1.Now()
284+
timePlus1H := metav1.Time{Time: time.Now().Add(time.Hour * 1)}
282285
testCluster := clusterv1.Cluster{
283286
TypeMeta: metav1.TypeMeta{Kind: "Cluster", APIVersion: clusterv1.GroupVersion.String()},
284287
ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceDefault, Name: "test-cluster"},
@@ -302,6 +305,7 @@ func TestReconcileMachinePoolRequest(t *testing.T) {
302305
testCases := []struct {
303306
machinePool expv1.MachinePool
304307
expected expected
308+
withTracker bool
305309
}{
306310
{
307311
machinePool: expv1.MachinePool{
@@ -316,7 +320,6 @@ func TestReconcileMachinePoolRequest(t *testing.T) {
316320
Replicas: ptr.To[int32](1),
317321
Template: clusterv1.MachineTemplateSpec{
318322
Spec: clusterv1.MachineSpec{
319-
320323
InfrastructureRef: corev1.ObjectReference{
321324
APIVersion: builder.InfrastructureGroupVersion.String(),
322325
Kind: builder.TestInfrastructureMachineTemplateKind,
@@ -339,6 +342,7 @@ func TestReconcileMachinePoolRequest(t *testing.T) {
339342
result: reconcile.Result{},
340343
err: false,
341344
},
345+
withTracker: true,
342346
},
343347
{
344348
machinePool: expv1.MachinePool{
@@ -375,6 +379,7 @@ func TestReconcileMachinePoolRequest(t *testing.T) {
375379
result: reconcile.Result{},
376380
err: false,
377381
},
382+
withTracker: true,
378383
},
379384
{
380385
machinePool: expv1.MachinePool{
@@ -385,7 +390,8 @@ func TestReconcileMachinePoolRequest(t *testing.T) {
385390
clusterv1.MachineControlPlaneLabel: "",
386391
},
387392
Finalizers: []string{expv1.MachinePoolFinalizer},
388-
DeletionTimestamp: &time,
393+
CreationTimestamp: timeNow,
394+
DeletionTimestamp: &timeNow,
389395
},
390396
Spec: expv1.MachinePoolSpec{
391397
ClusterName: "test-cluster",
@@ -397,7 +403,61 @@ func TestReconcileMachinePoolRequest(t *testing.T) {
397403
Kind: builder.TestInfrastructureMachineTemplateKind,
398404
Name: "infra-config1",
399405
},
400-
Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
406+
Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
407+
NodeDrainTimeout: &metav1.Duration{Duration: 10 * time.Second},
408+
},
409+
},
410+
ProviderIDList: []string{"aws:///us-test-2a/i-013ab00756982217f"},
411+
},
412+
Status: expv1.MachinePoolStatus{
413+
NodeRefs: []corev1.ObjectReference{
414+
{
415+
APIVersion: "v1",
416+
Kind: "Node",
417+
Name: "test-node",
418+
},
419+
},
420+
},
421+
},
422+
expected: expected{
423+
result: reconcile.Result{},
424+
err: false,
425+
},
426+
withTracker: true,
427+
},
428+
{
429+
machinePool: expv1.MachinePool{
430+
ObjectMeta: metav1.ObjectMeta{
431+
Name: "deleted-timeoutNode",
432+
Namespace: metav1.NamespaceDefault,
433+
Labels: map[string]string{
434+
clusterv1.MachineControlPlaneLabel: "",
435+
},
436+
Finalizers: []string{expv1.MachinePoolFinalizer},
437+
CreationTimestamp: timeNow,
438+
DeletionTimestamp: &timePlus1H,
439+
},
440+
Spec: expv1.MachinePoolSpec{
441+
ClusterName: "test-cluster",
442+
Replicas: ptr.To[int32](1),
443+
Template: clusterv1.MachineTemplateSpec{
444+
Spec: clusterv1.MachineSpec{
445+
InfrastructureRef: corev1.ObjectReference{
446+
APIVersion: builder.InfrastructureGroupVersion.String(),
447+
Kind: builder.TestInfrastructureMachineTemplateKind,
448+
Name: "infra-config1",
449+
},
450+
NodeDeletionTimeout: &metav1.Duration{Duration: -3 * time.Hour},
451+
},
452+
},
453+
ProviderIDList: []string{"aws:///us-test-2a/i-013ab00756982217f"},
454+
},
455+
Status: expv1.MachinePoolStatus{
456+
NodeRefs: []corev1.ObjectReference{
457+
{
458+
APIVersion: "v1",
459+
Kind: "Node",
460+
Name: "test-node",
401461
},
402462
},
403463
},
@@ -407,6 +467,43 @@ func TestReconcileMachinePoolRequest(t *testing.T) {
407467
err: false,
408468
},
409469
},
470+
{
471+
machinePool: expv1.MachinePool{
472+
ObjectMeta: metav1.ObjectMeta{
473+
Name: "try-delete-forever",
474+
Namespace: metav1.NamespaceDefault,
475+
Labels: map[string]string{
476+
clusterv1.MachineControlPlaneLabel: "",
477+
},
478+
Finalizers: []string{expv1.MachinePoolFinalizer},
479+
CreationTimestamp: timeNow,
480+
DeletionTimestamp: &timePlus1H,
481+
},
482+
Spec: expv1.MachinePoolSpec{
483+
ClusterName: "test-cluster",
484+
Replicas: ptr.To[int32](1),
485+
Template: clusterv1.MachineTemplateSpec{
486+
Spec: clusterv1.MachineSpec{
487+
NodeDeletionTimeout: &metav1.Duration{Duration: 0 * time.Second},
488+
},
489+
},
490+
ProviderIDList: []string{"aws:///us-test-2a/i-013ab00756982217f"},
491+
},
492+
Status: expv1.MachinePoolStatus{
493+
NodeRefs: []corev1.ObjectReference{
494+
{
495+
APIVersion: "v1",
496+
Kind: "Node",
497+
Name: "test-node",
498+
},
499+
},
500+
},
501+
},
502+
expected: expected{
503+
result: reconcile.Result{},
504+
err: true,
505+
},
506+
},
410507
}
411508

412509
for i := range testCases {
@@ -428,6 +525,11 @@ func TestReconcileMachinePoolRequest(t *testing.T) {
428525
APIReader: clientFake,
429526
}
430527

528+
// Set the tracker.
529+
if tc.withTracker {
530+
r.Tracker = remote.NewTestClusterCacheTracker(ctrl.LoggerFrom(context.TODO()), clientFake, clientFake, clientFake.Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace})
531+
}
532+
431533
result, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: util.ObjectKey(&tc.machinePool)})
432534
if tc.expected.err {
433535
g.Expect(err).To(HaveOccurred())
@@ -591,8 +693,10 @@ func TestRemoveMachinePoolFinalizerAfterDeleteReconcile(t *testing.T) {
591693
},
592694
}
593695
key := client.ObjectKey{Namespace: m.Namespace, Name: m.Name}
696+
clientFake := fake.NewClientBuilder().WithObjects(testCluster, m).WithStatusSubresource(&expv1.MachinePool{}).Build()
594697
mr := &MachinePoolReconciler{
595-
Client: fake.NewClientBuilder().WithObjects(testCluster, m).WithStatusSubresource(&expv1.MachinePool{}).Build(),
698+
Client: clientFake,
699+
Tracker: remote.NewTestClusterCacheTracker(ctrl.LoggerFrom(context.TODO()), clientFake, clientFake, clientFake.Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}),
596700
}
597701
_, err := mr.Reconcile(ctx, reconcile.Request{NamespacedName: key})
598702
g.Expect(err).ToNot(HaveOccurred())
@@ -864,6 +968,7 @@ func TestMachinePoolConditions(t *testing.T) {
864968
r := &MachinePoolReconciler{
865969
Client: clientFake,
866970
APIReader: clientFake,
971+
Tracker: remote.NewTestClusterCacheTracker(ctrl.LoggerFrom(context.TODO()), clientFake, clientFake, clientFake.Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}),
867972
}
868973

869974
_, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: util.ObjectKey(machinePool)})

exp/internal/webhooks/machinepool.go

+9
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@ import (
2121
"fmt"
2222
"strconv"
2323
"strings"
24+
"time"
2425

2526
"github.com/pkg/errors"
2627
v1 "k8s.io/api/admission/v1"
2728
apierrors "k8s.io/apimachinery/pkg/api/errors"
29+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2830
"k8s.io/apimachinery/pkg/runtime"
2931
"k8s.io/apimachinery/pkg/util/validation/field"
3032
"k8s.io/utils/ptr"
@@ -38,6 +40,8 @@ import (
3840
"sigs.k8s.io/cluster-api/util/version"
3941
)
4042

43+
const defaultNodeDeletionTimeout = 10 * time.Second
44+
4145
func (webhook *MachinePool) SetupWebhookWithManager(mgr ctrl.Manager) error {
4246
if webhook.decoder == nil {
4347
webhook.decoder = admission.NewDecoder(mgr.GetScheme())
@@ -108,6 +112,11 @@ func (webhook *MachinePool) Default(ctx context.Context, obj runtime.Object) err
108112
m.Spec.Template.Spec.InfrastructureRef.Namespace = m.Namespace
109113
}
110114

115+
// Set the default value for the node deletion timeout.
116+
if m.Spec.Template.Spec.NodeDeletionTimeout == nil {
117+
m.Spec.Template.Spec.NodeDeletionTimeout = &metav1.Duration{Duration: defaultNodeDeletionTimeout}
118+
}
119+
111120
// tolerate version strings without a "v" prefix: prepend it if it's not there.
112121
if m.Spec.Template.Spec.Version != nil && !strings.HasPrefix(*m.Spec.Template.Spec.Version, "v") {
113122
normalizedVersion := "v" + *m.Spec.Template.Spec.Version

exp/internal/webhooks/machinepool_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ func TestMachinePoolDefault(t *testing.T) {
6262
g.Expect(mp.Spec.Template.Spec.Bootstrap.ConfigRef.Namespace).To(Equal(mp.Namespace))
6363
g.Expect(mp.Spec.Template.Spec.InfrastructureRef.Namespace).To(Equal(mp.Namespace))
6464
g.Expect(mp.Spec.Template.Spec.Version).To(Equal(ptr.To("v1.20.0")))
65+
g.Expect(mp.Spec.Template.Spec.NodeDeletionTimeout).To(Equal(&metav1.Duration{Duration: defaultNodeDeletionTimeout}))
6566
}
6667

6768
func TestCalculateMachinePoolReplicas(t *testing.T) {

internal/controllers/topology/cluster/reconcile_state_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -2661,6 +2661,8 @@ func TestReconcileMachinePools(t *testing.T) {
26612661
if gotMachinePool.Spec.Template.Spec.Bootstrap.ConfigRef != nil {
26622662
wantMachinePoolState.Object.Spec.Template.Spec.Bootstrap.ConfigRef.Name = gotMachinePool.Spec.Template.Spec.Bootstrap.ConfigRef.Name
26632663
}
2664+
// expect default value for the node deletion timeout.
2665+
wantMachinePoolState.Object.Spec.Template.Spec.NodeDeletionTimeout = &metav1.Duration{Duration: 10 * time.Second}
26642666

26652667
// Compare MachinePool.
26662668
// Note: We're intentionally only comparing Spec as otherwise we would have to account for

0 commit comments

Comments
 (0)