Skip to content

Commit c246f9b

Browse files
authored
Merge pull request #1944 from vincepri/ms-controller-patch
✨ Refactor MS controller status patching
2 parents b3526e1 + a6db0cc commit c246f9b

File tree

2 files changed

+125
-166
lines changed

2 files changed

+125
-166
lines changed

controllers/machineset_controller.go

Lines changed: 125 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ import (
3434
"k8s.io/client-go/tools/record"
3535
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
3636
"sigs.k8s.io/cluster-api/controllers/external"
37+
"sigs.k8s.io/cluster-api/controllers/noderefutil"
38+
"sigs.k8s.io/cluster-api/controllers/remote"
3739
"sigs.k8s.io/cluster-api/util"
3840
"sigs.k8s.io/cluster-api/util/patch"
3941
ctrl "sigs.k8s.io/controller-runtime"
@@ -92,7 +94,6 @@ func (r *MachineSetReconciler) SetupWithManager(mgr ctrl.Manager, options contro
9294
}
9395

9496
r.recorder = mgr.GetEventRecorderFor("machineset-controller")
95-
9697
r.scheme = mgr.GetScheme()
9798
return nil
9899
}
@@ -179,14 +180,6 @@ func (r *MachineSetReconciler) reconcile(ctx context.Context, machineSet *cluste
179180
machineSet.Spec.Selector.MatchLabels[clusterv1.MachineSetLabelName] = machineSet.Name
180181
machineSet.Spec.Template.Labels[clusterv1.MachineSetLabelName] = machineSet.Name
181182

182-
selector, err := metav1.LabelSelectorAsSelector(&machineSet.Spec.Selector)
183-
if err != nil {
184-
return ctrl.Result{}, errors.Wrapf(err, "failed to parse MachineSet %q label selector", machineSet.Name)
185-
}
186-
// Copy label selector to its status counterpart in string format.
187-
// This is necessary for CRDs including scale subresources.
188-
machineSet.Status.Selector = selector.String()
189-
190183
selectorMap, err := metav1.LabelSelectorAsMap(&machineSet.Spec.Selector)
191184
if err != nil {
192185
return ctrl.Result{}, errors.Wrapf(err, "failed to convert MachineSet %q label selector to a map", machineSet.Name)
@@ -228,19 +221,22 @@ func (r *MachineSetReconciler) reconcile(ctx context.Context, machineSet *cluste
228221
syncErr := r.syncReplicas(ctx, machineSet, filteredMachines)
229222

230223
ms := machineSet.DeepCopy()
231-
newStatus := r.calculateStatus(ms, filteredMachines)
224+
newStatus, err := r.calculateStatus(cluster, ms, filteredMachines)
225+
if err != nil {
226+
return ctrl.Result{}, errors.Wrapf(err, "failed to calculate MachineSet's Status")
227+
}
232228

233229
// Always updates status as machines come up or die.
234-
updatedMS, err := updateMachineSetStatus(r.Client, machineSet, newStatus, logger)
230+
updatedMS, err := r.patchMachineSetStatus(ctx, machineSet, newStatus)
235231
if err != nil {
236232
if syncErr != nil {
237-
return ctrl.Result{}, errors.Wrapf(err, "failed to sync machines: %v. failed to update machine set status", syncErr)
233+
return ctrl.Result{}, errors.Wrapf(err, "failed to sync machines: %v. failed to patch MachineSet's Status", syncErr)
238234
}
239-
return ctrl.Result{}, errors.Wrap(err, "failed to update machine set status")
235+
return ctrl.Result{}, errors.Wrap(err, "failed to patch MachineSet's Status")
240236
}
241237

242238
if syncErr != nil {
243-
return ctrl.Result{}, errors.Wrapf(syncErr, "failed to sync Machineset replicas")
239+
return ctrl.Result{}, errors.Wrapf(syncErr, "failed to sync MachineSet replicas")
244240
}
245241

246242
var replicas int32
@@ -595,3 +591,118 @@ func (r *MachineSetReconciler) hasMatchingLabels(machineSet *clusterv1.MachineSe
595591
func (r *MachineSetReconciler) shouldAdopt(ms *clusterv1.MachineSet) bool {
596592
return !util.HasOwner(ms.OwnerReferences, clusterv1.GroupVersion.String(), []string{"MachineDeployment", "Cluster"})
597593
}
594+
595+
func (r *MachineSetReconciler) calculateStatus(cluster *clusterv1.Cluster, ms *clusterv1.MachineSet, filteredMachines []*clusterv1.Machine) (*clusterv1.MachineSetStatus, error) {
596+
logger := r.Log.WithValues("machineset", ms.Name, "namespace", ms.Namespace)
597+
newStatus := ms.Status.DeepCopy()
598+
599+
// Copy label selector to its status counterpart in string format.
600+
// This is necessary for CRDs including scale subresources.
601+
selector, err := metav1.LabelSelectorAsSelector(&ms.Spec.Selector)
602+
if err != nil {
603+
return nil, errors.Wrapf(err, "failed to calculate status for MachineSet %s/%s", ms.Namespace, ms.Name)
604+
}
605+
newStatus.Selector = selector.String()
606+
607+
// Count the number of machines that have labels matching the labels of the machine
608+
// template of the replica set, the matching machines may have more
609+
// labels than are in the template. Because the label of machineTemplateSpec is
610+
// a superset of the selector of the replica set, so the possible
611+
// matching machines must be part of the filteredMachines.
612+
fullyLabeledReplicasCount := 0
613+
readyReplicasCount := 0
614+
availableReplicasCount := 0
615+
templateLabel := labels.Set(ms.Spec.Template.Labels).AsSelectorPreValidated()
616+
617+
for _, machine := range filteredMachines {
618+
if templateLabel.Matches(labels.Set(machine.Labels)) {
619+
fullyLabeledReplicasCount++
620+
}
621+
622+
if machine.Status.NodeRef == nil {
623+
logger.V(2).Info("Unable to retrieve Node status, missing NodeRef", "machine", machine.Name)
624+
continue
625+
}
626+
627+
node, err := r.getMachineNode(cluster, machine)
628+
if err != nil {
629+
logger.Error(err, "Unable to retrieve Node status")
630+
continue
631+
}
632+
633+
if noderefutil.IsNodeReady(node) {
634+
readyReplicasCount++
635+
if noderefutil.IsNodeAvailable(node, ms.Spec.MinReadySeconds, metav1.Now()) {
636+
availableReplicasCount++
637+
}
638+
}
639+
}
640+
641+
newStatus.Replicas = int32(len(filteredMachines))
642+
newStatus.FullyLabeledReplicas = int32(fullyLabeledReplicasCount)
643+
newStatus.ReadyReplicas = int32(readyReplicasCount)
644+
newStatus.AvailableReplicas = int32(availableReplicasCount)
645+
return newStatus, nil
646+
}
647+
648+
// patchMachineSetStatus attempts to update the Status.Replicas of the given MachineSet.
649+
func (r *MachineSetReconciler) patchMachineSetStatus(ctx context.Context, ms *clusterv1.MachineSet, newStatus *clusterv1.MachineSetStatus) (*clusterv1.MachineSet, error) {
650+
logger := r.Log.WithValues("machineset", ms.Name, "namespace", ms.Namespace)
651+
652+
// This is the steady state. It happens when the MachineSet doesn't have any expectations, since
653+
// we do a periodic relist every 10 minutes. If the generations differ but the replicas are
654+
// the same, a caller might've resized to the same replica count.
655+
if ms.Status.Replicas == newStatus.Replicas &&
656+
ms.Status.FullyLabeledReplicas == newStatus.FullyLabeledReplicas &&
657+
ms.Status.ReadyReplicas == newStatus.ReadyReplicas &&
658+
ms.Status.AvailableReplicas == newStatus.AvailableReplicas &&
659+
ms.Generation == ms.Status.ObservedGeneration {
660+
return ms, nil
661+
}
662+
663+
patch := client.MergeFrom(ms.DeepCopyObject())
664+
665+
// Save the generation number we acted on, otherwise we might wrongfully indicate
666+
// that we've seen a spec update when we retry.
667+
newStatus.ObservedGeneration = ms.Generation
668+
669+
// Calculate the replicas for logging.
670+
var replicas int32
671+
if ms.Spec.Replicas != nil {
672+
replicas = *ms.Spec.Replicas
673+
}
674+
logger.V(4).Info(fmt.Sprintf("Updating status for %v: %s/%s, ", ms.Kind, ms.Namespace, ms.Name) +
675+
fmt.Sprintf("replicas %d->%d (need %d), ", ms.Status.Replicas, newStatus.Replicas, replicas) +
676+
fmt.Sprintf("fullyLabeledReplicas %d->%d, ", ms.Status.FullyLabeledReplicas, newStatus.FullyLabeledReplicas) +
677+
fmt.Sprintf("readyReplicas %d->%d, ", ms.Status.ReadyReplicas, newStatus.ReadyReplicas) +
678+
fmt.Sprintf("availableReplicas %d->%d, ", ms.Status.AvailableReplicas, newStatus.AvailableReplicas) +
679+
fmt.Sprintf("sequence No: %v->%v", ms.Status.ObservedGeneration, newStatus.ObservedGeneration))
680+
681+
newStatus.DeepCopyInto(&ms.Status)
682+
if err := r.Client.Status().Patch(ctx, ms, patch); err != nil {
683+
// TODO(vincepri): Try to fix this once we upgrade to CRDv1.
684+
// Our Status.Replicas field is a required non-pointer integer, Go defaults this field to "0" value when decoding
685+
// the data from the API server. For this reason, when we try to write the value "0", the patch is going to think
686+
// the value is already there and shouldn't be patched, making it fail validation.
687+
// Fallback to Update.
688+
if !apierrors.IsInvalid(err) {
689+
return nil, err
690+
}
691+
if err := r.Client.Status().Update(ctx, ms); err != nil {
692+
return nil, err
693+
}
694+
}
695+
return ms, nil
696+
}
697+
698+
func (r *MachineSetReconciler) getMachineNode(cluster *clusterv1.Cluster, machine *clusterv1.Machine) (*corev1.Node, error) {
699+
c, err := remote.NewClusterClient(r.Client, cluster, r.scheme)
700+
if err != nil {
701+
return nil, err
702+
}
703+
node := &corev1.Node{}
704+
if err := c.Get(context.TODO(), client.ObjectKey{Name: machine.Status.NodeRef.Name}, node); err != nil {
705+
return nil, errors.Wrapf(err, "error retrieving node %s for machine %s/%s", machine.Status.NodeRef.Name, machine.Namespace, machine.Name)
706+
}
707+
return node, nil
708+
}

controllers/machineset_status.go

Lines changed: 0 additions & 152 deletions
This file was deleted.

0 commit comments

Comments
 (0)