Skip to content

Commit 4c1687a

Browse files
committed
✨ Refactor MS controller status patching
Signed-off-by: Vince Prignano <[email protected]>
1 parent 83c7edf commit 4c1687a

File tree

2 files changed

+119
-166
lines changed

2 files changed

+119
-166
lines changed

controllers/machineset_controller.go

Lines changed: 119 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ import (
3434
"k8s.io/client-go/tools/record"
3535
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
3636
"sigs.k8s.io/cluster-api/controllers/external"
37+
"sigs.k8s.io/cluster-api/controllers/noderefutil"
38+
"sigs.k8s.io/cluster-api/controllers/remote"
3739
"sigs.k8s.io/cluster-api/util"
3840
"sigs.k8s.io/cluster-api/util/patch"
3941
ctrl "sigs.k8s.io/controller-runtime"
@@ -92,7 +94,6 @@ func (r *MachineSetReconciler) SetupWithManager(mgr ctrl.Manager, options contro
9294
}
9395

9496
r.recorder = mgr.GetEventRecorderFor("machineset-controller")
95-
9697
r.scheme = mgr.GetScheme()
9798
return nil
9899
}
@@ -179,14 +180,6 @@ func (r *MachineSetReconciler) reconcile(ctx context.Context, machineSet *cluste
179180
machineSet.Spec.Selector.MatchLabels[clusterv1.MachineSetLabelName] = machineSet.Name
180181
machineSet.Spec.Template.Labels[clusterv1.MachineSetLabelName] = machineSet.Name
181182

182-
selector, err := metav1.LabelSelectorAsSelector(&machineSet.Spec.Selector)
183-
if err != nil {
184-
return ctrl.Result{}, errors.Wrapf(err, "failed to parse MachineSet %q label selector", machineSet.Name)
185-
}
186-
// Copy label selector to its status counterpart in string format.
187-
// This is necessary for CRDs including scale subresources.
188-
machineSet.Status.Selector = selector.String()
189-
190183
selectorMap, err := metav1.LabelSelectorAsMap(&machineSet.Spec.Selector)
191184
if err != nil {
192185
return ctrl.Result{}, errors.Wrapf(err, "failed to convert MachineSet %q label selector to a map", machineSet.Name)
@@ -228,19 +221,19 @@ func (r *MachineSetReconciler) reconcile(ctx context.Context, machineSet *cluste
228221
syncErr := r.syncReplicas(ctx, machineSet, filteredMachines)
229222

230223
ms := machineSet.DeepCopy()
231-
newStatus := r.calculateStatus(ms, filteredMachines)
224+
newStatus := r.calculateStatus(cluster, ms, filteredMachines)
232225

233226
// Always updates status as machines come up or die.
234-
updatedMS, err := updateMachineSetStatus(r.Client, machineSet, newStatus, logger)
227+
updatedMS, err := r.patchMachineSetStatus(ctx, machineSet, newStatus)
235228
if err != nil {
236229
if syncErr != nil {
237-
return ctrl.Result{}, errors.Wrapf(err, "failed to sync machines: %v. failed to update machine set status", syncErr)
230+
return ctrl.Result{}, errors.Wrapf(err, "failed to sync machines: %v. failed to patch MachineSet's Status", syncErr)
238231
}
239-
return ctrl.Result{}, errors.Wrap(err, "failed to update machine set status")
232+
return ctrl.Result{}, errors.Wrap(err, "failed to patch MachineSet's Status")
240233
}
241234

242235
if syncErr != nil {
243-
return ctrl.Result{}, errors.Wrapf(syncErr, "failed to sync Machineset replicas")
236+
return ctrl.Result{}, errors.Wrapf(syncErr, "failed to sync MachineSet replicas")
244237
}
245238

246239
var replicas int32
@@ -595,3 +588,115 @@ func (r *MachineSetReconciler) hasMatchingLabels(machineSet *clusterv1.MachineSe
595588
func (r *MachineSetReconciler) shouldAdopt(ms *clusterv1.MachineSet) bool {
596589
return !util.HasOwner(ms.OwnerReferences, clusterv1.GroupVersion.String(), []string{"MachineDeployment", "Cluster"})
597590
}
591+
592+
func (r *MachineSetReconciler) calculateStatus(cluster *clusterv1.Cluster, ms *clusterv1.MachineSet, filteredMachines []*clusterv1.Machine) clusterv1.MachineSetStatus {
593+
logger := r.Log.WithValues("machineset", ms.Name, "namespace", ms.Namespace)
594+
newStatus := ms.Status
595+
596+
// Copy label selector to its status counterpart in string format.
597+
// This is necessary for CRDs including scale subresources.
598+
selector, _ := metav1.LabelSelectorAsSelector(&ms.Spec.Selector)
599+
newStatus.Selector = selector.String()
600+
601+
// Count the number of machines that have labels matching the labels of the machine
602+
// template of the replica set, the matching machines may have more
603+
// labels than are in the template. Because the label of machineTemplateSpec is
604+
// a superset of the selector of the replica set, so the possible
605+
// matching machines must be part of the filteredMachines.
606+
fullyLabeledReplicasCount := 0
607+
readyReplicasCount := 0
608+
availableReplicasCount := 0
609+
templateLabel := labels.Set(ms.Spec.Template.Labels).AsSelectorPreValidated()
610+
611+
for _, machine := range filteredMachines {
612+
if templateLabel.Matches(labels.Set(machine.Labels)) {
613+
fullyLabeledReplicasCount++
614+
}
615+
616+
if machine.Status.NodeRef == nil {
617+
logger.V(2).Info("Unable to retrieve Node status, missing NodeRef", "machine", machine.Name)
618+
continue
619+
}
620+
621+
node, err := r.getMachineNode(cluster, machine)
622+
if err != nil {
623+
logger.Error(err, "Unable to retrieve Node status")
624+
continue
625+
}
626+
627+
if noderefutil.IsNodeReady(node) {
628+
readyReplicasCount++
629+
if noderefutil.IsNodeAvailable(node, ms.Spec.MinReadySeconds, metav1.Now()) {
630+
availableReplicasCount++
631+
}
632+
}
633+
}
634+
635+
newStatus.Replicas = int32(len(filteredMachines))
636+
newStatus.FullyLabeledReplicas = int32(fullyLabeledReplicasCount)
637+
newStatus.ReadyReplicas = int32(readyReplicasCount)
638+
newStatus.AvailableReplicas = int32(availableReplicasCount)
639+
return newStatus
640+
}
641+
642+
// patchMachineSetStatus attempts to update the Status.Replicas of the given MachineSet.
643+
func (r *MachineSetReconciler) patchMachineSetStatus(ctx context.Context, ms *clusterv1.MachineSet, newStatus clusterv1.MachineSetStatus) (*clusterv1.MachineSet, error) {
644+
logger := r.Log.WithValues("machineset", ms.Name, "namespace", ms.Namespace)
645+
646+
// This is the steady state. It happens when the MachineSet doesn't have any expectations, since
647+
// we do a periodic relist every 10 minutes. If the generations differ but the replicas are
648+
// the same, a caller might've resized to the same replica count.
649+
if ms.Status.Replicas == newStatus.Replicas &&
650+
ms.Status.FullyLabeledReplicas == newStatus.FullyLabeledReplicas &&
651+
ms.Status.ReadyReplicas == newStatus.ReadyReplicas &&
652+
ms.Status.AvailableReplicas == newStatus.AvailableReplicas &&
653+
ms.Generation == ms.Status.ObservedGeneration {
654+
return ms, nil
655+
}
656+
657+
patch := client.MergeFrom(ms.DeepCopyObject())
658+
659+
// Save the generation number we acted on, otherwise we might wrongfully indicate
660+
// that we've seen a spec update when we retry.
661+
newStatus.ObservedGeneration = ms.Generation
662+
663+
// Calculate the replicas for logging.
664+
var replicas int32
665+
if ms.Spec.Replicas != nil {
666+
replicas = *ms.Spec.Replicas
667+
}
668+
logger.V(4).Info(fmt.Sprintf("Updating status for %v: %s/%s, ", ms.Kind, ms.Namespace, ms.Name) +
669+
fmt.Sprintf("replicas %d->%d (need %d), ", ms.Status.Replicas, newStatus.Replicas, replicas) +
670+
fmt.Sprintf("fullyLabeledReplicas %d->%d, ", ms.Status.FullyLabeledReplicas, newStatus.FullyLabeledReplicas) +
671+
fmt.Sprintf("readyReplicas %d->%d, ", ms.Status.ReadyReplicas, newStatus.ReadyReplicas) +
672+
fmt.Sprintf("availableReplicas %d->%d, ", ms.Status.AvailableReplicas, newStatus.AvailableReplicas) +
673+
fmt.Sprintf("sequence No: %v->%v", ms.Status.ObservedGeneration, newStatus.ObservedGeneration))
674+
675+
ms.Status = newStatus
676+
if err := r.Client.Status().Patch(ctx, ms, patch); err != nil {
677+
// TODO(vincepri): Try to fix this once we upgrade to CRDv1.
678+
// Our Status.Replicas field is a required non-pointer integer, Go defaults this field to "0" value when decoding
679+
// the data from the API server. For this reason, when we try to write the value "0", the patch is going to think
680+
// the value is already there and shouldn't be patched, making it fail validation.
681+
// Fallback to Update.
682+
if !apierrors.IsInvalid(err) {
683+
return nil, err
684+
}
685+
if err := r.Client.Status().Update(ctx, ms); err != nil {
686+
return nil, err
687+
}
688+
}
689+
return ms, nil
690+
}
691+
692+
func (r *MachineSetReconciler) getMachineNode(cluster *clusterv1.Cluster, machine *clusterv1.Machine) (*corev1.Node, error) {
693+
c, err := remote.NewClusterClient(r.Client, cluster, r.scheme)
694+
if err != nil {
695+
return nil, err
696+
}
697+
node := &corev1.Node{}
698+
if err := c.Get(context.TODO(), client.ObjectKey{Name: machine.Status.NodeRef.Name}, node); err != nil {
699+
return nil, errors.Wrapf(err, "error retrieving node %s for machine %s/%s", machine.Status.NodeRef.Name, machine.Namespace, machine.Name)
700+
}
701+
return node, nil
702+
}

controllers/machineset_status.go

Lines changed: 0 additions & 152 deletions
This file was deleted.

0 commit comments

Comments
 (0)