Skip to content

Commit 808ca1c

Browse files
authored
Merge pull request #7532 from fabriziopandini/fix-adoption-race
🐛 Adoption for stand-alone objects should ignore objects generated by MachineDeployments
2 parents f9fcf3f + e53b930 commit 808ca1c

File tree

2 files changed

+30
-3
lines changed

2 files changed

+30
-3
lines changed

internal/controllers/machine/machine_controller.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,8 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
257257
return ctrl.Result{}, errors.Wrapf(err, "error watching nodes on target cluster")
258258
}
259259

260-
// If the Machine belongs to a cluster, add an owner reference.
260+
// If the machine is a stand-alone one, meaning not originated from a MachineDeployment, then set it as directly
261+
// owned by the Cluster (if not already present).
261262
if r.shouldAdopt(m) {
262263
m.OwnerReferences = util.EnsureOwnerRef(m.OwnerReferences, metav1.OwnerReference{
263264
APIVersion: clusterv1.GroupVersion.String(),
@@ -738,8 +739,20 @@ func (r *Reconciler) reconcileDeleteExternal(ctx context.Context, m *clusterv1.M
738739
return obj, nil
739740
}
740741

742+
// shouldAdopt returns true if the Machine should be adopted as a stand-alone Machine directly owned by the Cluster.
741743
func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool {
742-
return metav1.GetControllerOf(m) == nil && !util.HasOwner(m.OwnerReferences, clusterv1.GroupVersion.String(), []string{"Cluster"})
744+
// if the machine is controlled by something (MS or KCP), or if it is a stand-alone machine directly owned by the Cluster, then no-op.
745+
if metav1.GetControllerOf(m) != nil || util.HasOwner(m.OwnerReferences, clusterv1.GroupVersion.String(), []string{"Cluster"}) {
746+
return false
747+
}
748+
749+
// If the Machine is originated by a MachineDeployment, this prevents it from being adopted as a stand-alone Machine.
750+
// Note: this is required because after restore from a backup both the Machine controller and the
751+
// MachineSet controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
752+
if _, ok := m.Labels[clusterv1.MachineDeploymentUniqueLabel]; ok {
753+
return false
754+
}
755+
return true
743756
}
744757

745758
func (r *Reconciler) watchClusterNodes(ctx context.Context, cluster *clusterv1.Cluster) error {

internal/controllers/machineset/machineset_controller.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,8 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
209209
}
210210
machineSet.Labels[clusterv1.ClusterLabelName] = machineSet.Spec.ClusterName
211211

212+
// If the machine set is a stand alone one, meaning not originated from a MachineDeployment, then set it as directly
213+
// owned by the Cluster (if not already present).
212214
if r.shouldAdopt(machineSet) {
213215
machineSet.OwnerReferences = util.EnsureOwnerRef(machineSet.OwnerReferences, metav1.OwnerReference{
214216
APIVersion: clusterv1.GroupVersion.String(),
@@ -637,8 +639,20 @@ func (r *Reconciler) getMachineSetsForMachine(ctx context.Context, m *clusterv1.
637639
return mss, nil
638640
}
639641

642+
// shouldAdopt returns true if the MachineSet should be adopted as a stand-alone MachineSet directly owned by the Cluster.
640643
func (r *Reconciler) shouldAdopt(ms *clusterv1.MachineSet) bool {
641-
return !util.HasOwner(ms.OwnerReferences, clusterv1.GroupVersion.String(), []string{"MachineDeployment", "Cluster"})
644+
// if the MachineSet is controlled by a MachineDeployment, or if it is a stand-alone MachinesSet directly owned by the Cluster, then no-op.
645+
if util.HasOwner(ms.OwnerReferences, clusterv1.GroupVersion.String(), []string{"MachineDeployment", "Cluster"}) {
646+
return false
647+
}
648+
649+
// If the MachineSet is originated by a MachineDeployment, this prevents it from being adopted as a stand-alone MachineSet.
650+
// Note: this is required because after restore from a backup both the MachineSet controller and the
651+
// MachineDeployment controller are racing to adopt MachineSets, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
652+
if _, ok := ms.Labels[clusterv1.MachineDeploymentUniqueLabel]; ok {
653+
return false
654+
}
655+
return true
642656
}
643657

644658
// updateStatus updates the Status field for the MachineSet

0 commit comments

Comments
 (0)