Skip to content

Commit f9372a1

Browse files
authored
🌱 machine: prevent error spamming for NodeOutdatedTaint if objects are not found (#11148)
* machine: add test coverage for shouldNodeHaveOutdatedTaint * machine: prevent error spamming for NodeOutdatedTaint if objects are not found * lint fix * review fixes * review fixes * review fix
1 parent d7a2335 commit f9372a1

File tree

2 files changed

+189
-22
lines changed

2 files changed

+189
-22
lines changed

‎internal/controllers/machine/machine_controller_noderef.go

+36-20
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
"github.com/pkg/errors"
2525
corev1 "k8s.io/api/core/v1"
26+
apierrors "k8s.io/apimachinery/pkg/api/errors"
2627
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2728
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2829
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -296,14 +297,20 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client,
296297
hasTaintChanges := taints.RemoveNodeTaint(newNode, clusterv1.NodeUninitializedTaint)
297298

298299
// Set Taint to a node in an old MachineSet and unset Taint from a node in a new MachineSet
299-
isOutdated, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m)
300+
isOutdated, notFound, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m)
300301
if err != nil {
301302
return errors.Wrapf(err, "failed to check if Node %s is outdated", klog.KRef("", node.Name))
302303
}
303-
if isOutdated {
304-
hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
305-
} else {
306-
hasTaintChanges = taints.RemoveNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
304+
305+
// It is only possible to identify if we have to set or remove the NodeOutdatedRevisionTaint if shouldNodeHaveOutdatedTaint
306+
// found all relevant objects.
307+
// Example: when the MachineDeployment or Machineset can't be found due to a background deletion of objects.
308+
if !notFound {
309+
if isOutdated {
310+
hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
311+
} else {
312+
hasTaintChanges = taints.RemoveNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
313+
}
307314
}
308315

309316
if !hasAnnotationChanges && !hasLabelChanges && !hasTaintChanges {
@@ -313,52 +320,61 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client,
313320
return remoteClient.Patch(ctx, newNode, client.StrategicMergeFrom(node))
314321
}
315322

316-
func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (bool, error) {
323+
// shouldNodeHaveOutdatedTaint tries to compare the revision of the owning MachineSet to the MachineDeployment.
324+
// It returns notFound = true if the OwnerReference is not set or the APIServer returns NotFound for the MachineSet or MachineDeployment.
325+
// Note: This three cases could happen during background deletion of objects.
326+
func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (outdated bool, notFound bool, err error) {
317327
if _, hasLabel := m.Labels[clusterv1.MachineDeploymentNameLabel]; !hasLabel {
318-
return false, nil
328+
return false, false, nil
319329
}
320330

321331
// Resolve the MachineSet name via owner references because the label value
322332
// could also be a hash.
323-
objKey, err := getOwnerMachineSetObjectKey(m.ObjectMeta)
324-
if err != nil {
325-
return false, err
333+
objKey, notFound, err := getOwnerMachineSetObjectKey(m.ObjectMeta)
334+
if err != nil || notFound {
335+
return false, notFound, err
326336
}
327337
ms := &clusterv1.MachineSet{}
328338
if err := c.Get(ctx, *objKey, ms); err != nil {
329-
return false, err
339+
if apierrors.IsNotFound(err) {
340+
return false, true, nil
341+
}
342+
return false, false, err
330343
}
331344
md := &clusterv1.MachineDeployment{}
332345
objKey = &client.ObjectKey{
333346
Namespace: m.ObjectMeta.Namespace,
334347
Name: m.Labels[clusterv1.MachineDeploymentNameLabel],
335348
}
336349
if err := c.Get(ctx, *objKey, md); err != nil {
337-
return false, err
350+
if apierrors.IsNotFound(err) {
351+
return false, true, nil
352+
}
353+
return false, false, err
338354
}
339355
msRev, err := mdutil.Revision(ms)
340356
if err != nil {
341-
return false, err
357+
return false, false, err
342358
}
343359
mdRev, err := mdutil.Revision(md)
344360
if err != nil {
345-
return false, err
361+
return false, false, err
346362
}
347363
if msRev < mdRev {
348-
return true, nil
364+
return true, false, nil
349365
}
350-
return false, nil
366+
return false, false, nil
351367
}
352368

353-
func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, error) {
369+
func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, bool, error) {
354370
for _, ref := range obj.GetOwnerReferences() {
355371
gv, err := schema.ParseGroupVersion(ref.APIVersion)
356372
if err != nil {
357-
return nil, err
373+
return nil, false, err
358374
}
359375
if ref.Kind == "MachineSet" && gv.Group == clusterv1.GroupVersion.Group {
360-
return &client.ObjectKey{Namespace: obj.Namespace, Name: ref.Name}, nil
376+
return &client.ObjectKey{Namespace: obj.Namespace, Name: ref.Name}, false, nil
361377
}
362378
}
363-
return nil, errors.Errorf("failed to find MachineSet owner reference for Machine %s", klog.KRef(obj.GetNamespace(), obj.GetName()))
379+
return nil, true, nil
364380
}

‎internal/controllers/machine/machine_controller_noderef_test.go

+153-2
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,14 @@ import (
2929
"k8s.io/utils/ptr"
3030
ctrl "sigs.k8s.io/controller-runtime"
3131
"sigs.k8s.io/controller-runtime/pkg/client"
32+
"sigs.k8s.io/controller-runtime/pkg/client/fake"
3233
"sigs.k8s.io/controller-runtime/pkg/handler"
3334
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3435

3536
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3637
"sigs.k8s.io/cluster-api/controllers/remote"
38+
"sigs.k8s.io/cluster-api/internal/test/builder"
39+
"sigs.k8s.io/cluster-api/internal/topology/ownerrefs"
3740
"sigs.k8s.io/cluster-api/util"
3841
"sigs.k8s.io/cluster-api/util/kubeconfig"
3942
)
@@ -770,6 +773,49 @@ func TestPatchNode(t *testing.T) {
770773
ms: newFakeMachineSet(metav1.NamespaceDefault, clusterName),
771774
md: newFakeMachineDeployment(metav1.NamespaceDefault, clusterName),
772775
},
776+
{
777+
name: "Ensure Labels and Annotations still get patched if MachineSet and Machinedeployment cannot be found",
778+
oldNode: &corev1.Node{
779+
ObjectMeta: metav1.ObjectMeta{
780+
Name: fmt.Sprintf("node-%s", util.RandomString(6)),
781+
},
782+
},
783+
newLabels: map[string]string{
784+
"label-from-machine": "foo",
785+
},
786+
newAnnotations: map[string]string{
787+
"annotation-from-machine": "foo",
788+
},
789+
expectedLabels: map[string]string{
790+
"label-from-machine": "foo",
791+
},
792+
expectedAnnotations: map[string]string{
793+
"annotation-from-machine": "foo",
794+
clusterv1.LabelsFromMachineAnnotation: "label-from-machine",
795+
},
796+
expectedTaints: []corev1.Taint{
797+
{Key: "node.kubernetes.io/not-ready", Effect: "NoSchedule"}, // Added by the API server
798+
},
799+
machine: &clusterv1.Machine{
800+
ObjectMeta: metav1.ObjectMeta{
801+
Name: fmt.Sprintf("ma-%s", util.RandomString(6)),
802+
Namespace: metav1.NamespaceDefault,
803+
Labels: map[string]string{
804+
clusterv1.MachineSetNameLabel: "test-ms-missing",
805+
clusterv1.MachineDeploymentNameLabel: "test-md",
806+
},
807+
OwnerReferences: []metav1.OwnerReference{{
808+
Kind: "MachineSet",
809+
Name: "test-ms-missing",
810+
APIVersion: clusterv1.GroupVersion.String(),
811+
UID: "uid",
812+
}},
813+
},
814+
Spec: newFakeMachineSpec(metav1.NamespaceDefault, clusterName),
815+
},
816+
ms: nil,
817+
md: nil,
818+
},
773819
{
774820
name: "Ensure NodeOutdatedRevisionTaint to be set if a node is associated to an outdated machineset",
775821
oldNode: &corev1.Node{
@@ -913,8 +959,12 @@ func TestPatchNode(t *testing.T) {
913959

914960
g.Expect(env.CreateAndWait(ctx, oldNode)).To(Succeed())
915961
g.Expect(env.CreateAndWait(ctx, machine)).To(Succeed())
916-
g.Expect(env.CreateAndWait(ctx, ms)).To(Succeed())
917-
g.Expect(env.CreateAndWait(ctx, md)).To(Succeed())
962+
if ms != nil {
963+
g.Expect(env.CreateAndWait(ctx, ms)).To(Succeed())
964+
}
965+
if md != nil {
966+
g.Expect(env.CreateAndWait(ctx, md)).To(Succeed())
967+
}
918968
t.Cleanup(func() {
919969
_ = env.CleanupAndWait(ctx, oldNode, machine, ms, md)
920970
})
@@ -994,3 +1044,104 @@ func newFakeMachineDeployment(namespace, clusterName string) *clusterv1.MachineD
9941044
},
9951045
}
9961046
}
1047+
1048+
func Test_shouldNodeHaveOutdatedTaint(t *testing.T) {
1049+
namespaceName := "test"
1050+
namespace := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespaceName}}
1051+
1052+
testMachineDeployment := builder.MachineDeployment(namespaceName, "my-md").
1053+
WithAnnotations(map[string]string{clusterv1.RevisionAnnotation: "1"}).
1054+
Build()
1055+
testMachineDeploymentNew := testMachineDeployment.DeepCopy()
1056+
testMachineDeploymentNew.Annotations = map[string]string{clusterv1.RevisionAnnotation: "2"}
1057+
1058+
testMachineSet := builder.MachineSet(namespaceName, "my-ms").
1059+
WithOwnerReferences([]metav1.OwnerReference{*ownerrefs.OwnerReferenceTo(testMachineDeployment, testMachineDeployment.GroupVersionKind())}).
1060+
Build()
1061+
testMachineSet.Annotations = map[string]string{clusterv1.RevisionAnnotation: "1"}
1062+
1063+
labels := map[string]string{
1064+
clusterv1.MachineDeploymentNameLabel: "my-md",
1065+
}
1066+
testMachine := builder.Machine(namespaceName, "my-machine").WithLabels(labels).Build()
1067+
testMachine.SetOwnerReferences([]metav1.OwnerReference{*ownerrefs.OwnerReferenceTo(testMachineSet, testMachineSet.GroupVersionKind())})
1068+
1069+
tests := []struct {
1070+
name string
1071+
machine *clusterv1.Machine
1072+
objects []client.Object
1073+
wantOutdated bool
1074+
wantNotFound bool
1075+
wantErr bool
1076+
}{
1077+
{
1078+
name: "Machineset not outdated",
1079+
machine: testMachine,
1080+
objects: []client.Object{testMachineSet, testMachineDeployment},
1081+
wantOutdated: false,
1082+
wantNotFound: false,
1083+
wantErr: false,
1084+
},
1085+
{
1086+
name: "Machineset outdated",
1087+
machine: testMachine,
1088+
objects: []client.Object{testMachineSet, testMachineDeploymentNew},
1089+
wantOutdated: true,
1090+
wantNotFound: false,
1091+
wantErr: false,
1092+
},
1093+
{
1094+
name: "Machine without MachineDeployment label",
1095+
machine: builder.Machine(namespaceName, "no-deploy").Build(),
1096+
objects: nil,
1097+
wantOutdated: false,
1098+
wantNotFound: false,
1099+
wantErr: false,
1100+
},
1101+
{
1102+
name: "Machine without OwnerReference",
1103+
machine: builder.Machine(namespaceName, "no-ownerref").WithLabels(labels).Build(),
1104+
objects: nil,
1105+
wantOutdated: false,
1106+
wantNotFound: true,
1107+
wantErr: false,
1108+
},
1109+
{
1110+
name: "Machine without existing MachineSet",
1111+
machine: testMachine,
1112+
objects: nil,
1113+
wantOutdated: false,
1114+
wantNotFound: true,
1115+
wantErr: false,
1116+
},
1117+
{
1118+
name: "Machine without existing MachineDeployment",
1119+
machine: testMachine,
1120+
objects: []client.Object{testMachineSet},
1121+
wantOutdated: false,
1122+
wantNotFound: true,
1123+
wantErr: false,
1124+
},
1125+
}
1126+
for _, tt := range tests {
1127+
t.Run(tt.name, func(t *testing.T) {
1128+
objects := []client.Object{namespace}
1129+
objects = append(objects, tt.machine)
1130+
objects = append(objects, tt.objects...)
1131+
c := fake.NewClientBuilder().
1132+
WithObjects(objects...).Build()
1133+
1134+
gotOutdated, gotNotFound, err := shouldNodeHaveOutdatedTaint(ctx, c, tt.machine)
1135+
if (err != nil) != tt.wantErr {
1136+
t.Errorf("shouldNodeHaveOutdatedTaint() error = %v, wantErr %v", err, tt.wantErr)
1137+
return
1138+
}
1139+
if gotOutdated != tt.wantOutdated {
1140+
t.Errorf("shouldNodeHaveOutdatedTaint() = %v, want %v", gotOutdated, tt.wantOutdated)
1141+
}
1142+
if gotNotFound != tt.wantNotFound {
1143+
t.Errorf("shouldNodeHaveOutdatedTaint() = %v, want %v", gotNotFound, tt.wantNotFound)
1144+
}
1145+
})
1146+
}
1147+
}

0 commit comments

Comments
 (0)