Skip to content

Commit acd13dc

Browse files
committed
✨ Refactor MS controller status patching
Signed-off-by: Vince Prignano <[email protected]>
1 parent c531036 commit acd13dc

File tree

2 files changed

+121
-165
lines changed

2 files changed

+121
-165
lines changed

controllers/machineset_controller.go

Lines changed: 121 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ import (
3434
"k8s.io/client-go/tools/record"
3535
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
3636
"sigs.k8s.io/cluster-api/controllers/external"
37+
"sigs.k8s.io/cluster-api/controllers/noderefutil"
38+
"sigs.k8s.io/cluster-api/controllers/remote"
3739
"sigs.k8s.io/cluster-api/util"
3840
ctrl "sigs.k8s.io/controller-runtime"
3941
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -85,7 +87,6 @@ func (r *MachineSetReconciler) SetupWithManager(mgr ctrl.Manager, options contro
8587
}
8688

8789
r.recorder = mgr.GetEventRecorderFor("machineset-controller")
88-
8990
r.scheme = mgr.GetScheme()
9091
return nil
9192
}
@@ -161,14 +162,6 @@ func (r *MachineSetReconciler) reconcile(ctx context.Context, machineSet *cluste
161162
machineSet.Spec.Selector.MatchLabels[clusterv1.MachineSetLabelName] = machineSet.Name
162163
machineSet.Spec.Template.Labels[clusterv1.MachineSetLabelName] = machineSet.Name
163164

164-
selector, err := metav1.LabelSelectorAsSelector(&machineSet.Spec.Selector)
165-
if err != nil {
166-
return ctrl.Result{}, errors.Wrapf(err, "failed to parse MachineSet %q label selector", machineSet.Name)
167-
}
168-
// Copy label selector to its status counterpart in string format.
169-
// This is necessary for CRDs including scale subresources.
170-
machineSet.Status.Selector = selector.String()
171-
172165
selectorMap, err := metav1.LabelSelectorAsMap(&machineSet.Spec.Selector)
173166
if err != nil {
174167
return ctrl.Result{}, errors.Wrapf(err, "failed to convert MachineSet %q label selector to a map", machineSet.Name)
@@ -213,16 +206,16 @@ func (r *MachineSetReconciler) reconcile(ctx context.Context, machineSet *cluste
213206
newStatus := r.calculateStatus(ms, filteredMachines)
214207

215208
// Always updates status as machines come up or die.
216-
updatedMS, err := updateMachineSetStatus(r.Client, machineSet, newStatus, logger)
209+
updatedMS, err := r.patchMachineSetStatus(ctx, machineSet, newStatus)
217210
if err != nil {
218211
if syncErr != nil {
219-
return ctrl.Result{}, errors.Wrapf(err, "failed to sync machines: %v. failed to update machine set status", syncErr)
212+
return ctrl.Result{}, errors.Wrapf(err, "failed to sync machines: %v. failed to patch MachineSet's Status", syncErr)
220213
}
221-
return ctrl.Result{}, errors.Wrap(err, "failed to update machine set status")
214+
return ctrl.Result{}, errors.Wrap(err, "failed to patch MachineSet's Status")
222215
}
223216

224217
if syncErr != nil {
225-
return ctrl.Result{}, errors.Wrapf(syncErr, "failed to sync Machineset replicas")
218+
return ctrl.Result{}, errors.Wrapf(syncErr, "failed to sync MachineSet replicas")
226219
}
227220

228221
var replicas int32
@@ -549,3 +542,118 @@ func (r *MachineSetReconciler) hasMatchingLabels(machineSet *clusterv1.MachineSe
549542
func (r *MachineSetReconciler) shouldAdopt(ms *clusterv1.MachineSet) bool {
550543
return !util.HasOwner(ms.OwnerReferences, clusterv1.GroupVersion.String(), []string{"MachineDeployment", "Cluster"})
551544
}
545+
546+
func (r *MachineSetReconciler) calculateStatus(ms *clusterv1.MachineSet, filteredMachines []*clusterv1.Machine) clusterv1.MachineSetStatus {
547+
logger := r.Log.WithValues("machineset", ms.Name, "namespace", ms.Namespace)
548+
newStatus := ms.Status
549+
550+
// Copy label selector to its status counterpart in string format.
551+
// This is necessary for CRDs including scale subresources.
552+
selector, _ := metav1.LabelSelectorAsSelector(&ms.Spec.Selector)
553+
newStatus.Selector = selector.String()
554+
555+
// Count the number of machines that have labels matching the labels of the machine
556+
// template of the replica set, the matching machines may have more
557+
// labels than are in the template. Because the label of machineTemplateSpec is
558+
// a superset of the selector of the replica set, so the possible
559+
// matching machines must be part of the filteredMachines.
560+
fullyLabeledReplicasCount := 0
561+
readyReplicasCount := 0
562+
availableReplicasCount := 0
563+
templateLabel := labels.Set(ms.Spec.Template.Labels).AsSelectorPreValidated()
564+
565+
// Retrieve Cluster, if any.
566+
cluster, _ := util.GetClusterByName(context.Background(), r.Client, ms.ObjectMeta.Namespace, ms.Spec.ClusterName)
567+
568+
for _, machine := range filteredMachines {
569+
if templateLabel.Matches(labels.Set(machine.Labels)) {
570+
fullyLabeledReplicasCount++
571+
}
572+
573+
if machine.Status.NodeRef == nil {
574+
logger.V(2).Info("Unable to retrieve Node status, missing NodeRef", "machine", machine.Name)
575+
continue
576+
}
577+
578+
node, err := r.getMachineNode(cluster, machine)
579+
if err != nil {
580+
logger.Error(err, "Unable to retrieve Node status")
581+
continue
582+
}
583+
584+
if noderefutil.IsNodeReady(node) {
585+
readyReplicasCount++
586+
if noderefutil.IsNodeAvailable(node, ms.Spec.MinReadySeconds, metav1.Now()) {
587+
availableReplicasCount++
588+
}
589+
}
590+
}
591+
592+
newStatus.Replicas = int32(len(filteredMachines))
593+
newStatus.FullyLabeledReplicas = int32(fullyLabeledReplicasCount)
594+
newStatus.ReadyReplicas = int32(readyReplicasCount)
595+
newStatus.AvailableReplicas = int32(availableReplicasCount)
596+
return newStatus
597+
}
598+
599+
// patchMachineSetStatus attempts to update the Status.Replicas of the given MachineSet.
600+
func (r *MachineSetReconciler) patchMachineSetStatus(ctx context.Context, ms *clusterv1.MachineSet, newStatus clusterv1.MachineSetStatus) (*clusterv1.MachineSet, error) {
601+
logger := r.Log.WithValues("machineset", ms.Name, "namespace", ms.Namespace)
602+
603+
// This is the steady state. It happens when the MachineSet doesn't have any expectations, since
604+
// we do a periodic relist every 10 minutes. If the generations differ but the replicas are
605+
// the same, a caller might've resized to the same replica count.
606+
if ms.Status.Replicas == newStatus.Replicas &&
607+
ms.Status.FullyLabeledReplicas == newStatus.FullyLabeledReplicas &&
608+
ms.Status.ReadyReplicas == newStatus.ReadyReplicas &&
609+
ms.Status.AvailableReplicas == newStatus.AvailableReplicas &&
610+
ms.Generation == ms.Status.ObservedGeneration {
611+
return ms, nil
612+
}
613+
614+
patch := client.MergeFrom(ms.DeepCopyObject())
615+
616+
// Save the generation number we acted on, otherwise we might wrongfully indicate
617+
// that we've seen a spec update when we retry.
618+
newStatus.ObservedGeneration = ms.Generation
619+
620+
// Calculate the replicas for logging.
621+
var replicas int32
622+
if ms.Spec.Replicas != nil {
623+
replicas = *ms.Spec.Replicas
624+
}
625+
logger.V(4).Info(fmt.Sprintf("Updating status for %v: %s/%s, ", ms.Kind, ms.Namespace, ms.Name) +
626+
fmt.Sprintf("replicas %d->%d (need %d), ", ms.Status.Replicas, newStatus.Replicas, replicas) +
627+
fmt.Sprintf("fullyLabeledReplicas %d->%d, ", ms.Status.FullyLabeledReplicas, newStatus.FullyLabeledReplicas) +
628+
fmt.Sprintf("readyReplicas %d->%d, ", ms.Status.ReadyReplicas, newStatus.ReadyReplicas) +
629+
fmt.Sprintf("availableReplicas %d->%d, ", ms.Status.AvailableReplicas, newStatus.AvailableReplicas) +
630+
fmt.Sprintf("sequence No: %v->%v", ms.Status.ObservedGeneration, newStatus.ObservedGeneration))
631+
632+
ms.Status = newStatus
633+
if err := r.Client.Status().Patch(ctx, ms, patch); err != nil {
634+
// TODO(vincepri): Try to fix this once we upgrade to CRDv1.
635+
// Our Status.Replicas field is a required non-pointer integer, Go defaults this field to "0" value when decoding
636+
// the data from the API server. For this reason, when we try to write the value "0", the patch is going to think
637+
// the value is already there and shouldn't be patched, making it fail validation.
638+
// Fallback to Update.
639+
if !apierrors.IsInvalid(err) {
640+
return nil, err
641+
}
642+
if err := r.Client.Status().Update(ctx, ms); err != nil {
643+
return nil, err
644+
}
645+
}
646+
return ms, nil
647+
}
648+
649+
func (r *MachineSetReconciler) getMachineNode(cluster *clusterv1.Cluster, machine *clusterv1.Machine) (*corev1.Node, error) {
650+
c, err := remote.NewClusterClient(r.Client, cluster, r.scheme)
651+
if err != nil {
652+
return nil, err
653+
}
654+
node := &corev1.Node{}
655+
if err := c.Get(context.TODO(), client.ObjectKey{Name: machine.Status.NodeRef.Name}, node); err != nil {
656+
return nil, errors.Wrapf(err, "error retrieving node %s for machine %s/%s", machine.Status.NodeRef.Name, machine.Namespace, machine.Name)
657+
}
658+
return node, nil
659+
}

controllers/machineset_status.go

Lines changed: 0 additions & 152 deletions
This file was deleted.

0 commit comments

Comments
 (0)