Skip to content

Commit 0000f09

Browse files
authored
Merge pull request #11320 from fabriziopandini/update-v1beta2-status-kcp-controller
✨ Add more v1beta2 conditions to KCP
2 parents ef64625 + d21132d commit 0000f09

File tree

17 files changed

+1176
-70
lines changed

17 files changed

+1176
-70
lines changed

api/v1beta1/machineset_types.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ const (
132132
MachineSetMachinesReadyV1Beta2Condition = MachinesReadyV1Beta2Condition
133133

134134
// MachineSetMachinesReadyNoReplicasV1Beta2Reason surfaces when no machines exist for the MachineSet.
135-
MachineSetMachinesReadyNoReplicasV1Beta2Reason = "NoReplicas"
135+
MachineSetMachinesReadyNoReplicasV1Beta2Reason = NoReplicasV1Beta2Reason
136136

137137
// MachineSetMachinesReadyInternalErrorV1Beta2Reason surfaces unexpected failures when listing machines
138138
// or aggregating machine's conditions.
@@ -146,7 +146,7 @@ const (
146146
MachineSetMachinesUpToDateV1Beta2Condition = MachinesUpToDateV1Beta2Condition
147147

148148
// MachineSetMachinesUpToDateNoReplicasV1Beta2Reason surfaces when no machines exist for the MachineSet.
149-
MachineSetMachinesUpToDateNoReplicasV1Beta2Reason = "NoReplicas"
149+
MachineSetMachinesUpToDateNoReplicasV1Beta2Reason = NoReplicasV1Beta2Reason
150150

151151
// MachineSetMachinesUpToDateInternalErrorV1Beta2Reason surfaces unexpected failures when listing machines
152152
// or aggregating status.

api/v1beta1/v1beta2_condition_consts.go

+11
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,17 @@ const (
102102
// NotScalingDownV1Beta2Reason surfaces when an object is not scaling down.
103103
NotScalingDownV1Beta2Reason = "NotScalingDown"
104104

105+
// RemediatingV1Beta2Reason surfaces when an object owns at least one machine with HealthCheckSucceeded
106+
// set to false and with the OwnerRemediated condition set to false by the MachineHealthCheck controller.
107+
RemediatingV1Beta2Reason = "Remediating"
108+
109+
// NotRemediatingV1Beta2Reason surfaces when an object does not own any machines marked as not healthy
110+
// by the MachineHealthCheck controller.
111+
NotRemediatingV1Beta2Reason = "NotRemediating"
112+
113+
// NoReplicasV1Beta2Reason surfaces when an object that manage replicas does not have any.
114+
NoReplicasV1Beta2Reason = "NoReplicas"
115+
105116
// WaitingForReplicasSetV1Beta2Reason surfaces when the replica field of an object is not set.
106117
WaitingForReplicasSetV1Beta2Reason = "WaitingForReplicasSet"
107118

controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go

+47-3
Original file line numberDiff line numberDiff line change
@@ -96,30 +96,74 @@ const (
9696
// Please note this will include also APIServerPodHealthy, ControllerManagerPodHealthy, SchedulerPodHealthy conditions.
9797
// If not using an external etcd also EtcdPodHealthy, EtcdMemberHealthy conditions are included.
9898
KubeadmControlPlaneMachinesReadyV1Beta2Condition = clusterv1.MachinesReadyV1Beta2Condition
99+
100+
// KubeadmControlPlaneMachinesReadyNoReplicasV1Beta2Reason surfaces when no machines exist for the KubeadmControlPlane.
101+
KubeadmControlPlaneMachinesReadyNoReplicasV1Beta2Reason = clusterv1.NoReplicasV1Beta2Reason
102+
103+
// KubeadmControlPlaneMachinesReadyInternalErrorV1Beta2Reason surfaces unexpected failures when computing the MachinesReady condition.
104+
KubeadmControlPlaneMachinesReadyInternalErrorV1Beta2Reason = clusterv1.InternalErrorV1Beta2Reason
99105
)
100106

101107
// KubeadmControlPlane's MachinesUpToDate condition and corresponding reasons that will be used in v1Beta2 API version.
102108
const (
103109
// KubeadmControlPlaneMachinesUpToDateV1Beta2Condition surfaces details of controlled machines not up to date, if any.
104110
KubeadmControlPlaneMachinesUpToDateV1Beta2Condition = clusterv1.MachinesUpToDateV1Beta2Condition
111+
112+
// KubeadmControlPlaneMachinesUpToDateNoReplicasV1Beta2Reason surfaces when no machines exist for the KubeadmControlPlane.
113+
KubeadmControlPlaneMachinesUpToDateNoReplicasV1Beta2Reason = clusterv1.NoReplicasV1Beta2Reason
114+
115+
// KubeadmControlPlaneMachinesUpToDateInternalErrorV1Beta2Reason surfaces unexpected failures when computing the MachinesUpToDate condition.
116+
KubeadmControlPlaneMachinesUpToDateInternalErrorV1Beta2Reason = clusterv1.InternalErrorV1Beta2Reason
105117
)
106118

107119
// KubeadmControlPlane's ScalingUp condition and corresponding reasons that will be used in v1Beta2 API version.
108120
const (
109-
// KubeadmControlPlaneScalingUpV1Beta2Condition is true if available replicas < desired replicas.
121+
// KubeadmControlPlaneScalingUpV1Beta2Condition is true if actual replicas < desired replicas.
110122
KubeadmControlPlaneScalingUpV1Beta2Condition = clusterv1.ScalingUpV1Beta2Condition
123+
124+
// KubeadmControlPlaneScalingUpV1Beta2Reason surfaces when actual replicas < desired replicas.
125+
KubeadmControlPlaneScalingUpV1Beta2Reason = clusterv1.ScalingUpV1Beta2Reason
126+
127+
// KubeadmControlPlaneNotScalingUpV1Beta2Reason surfaces when actual replicas >= desired replicas.
128+
KubeadmControlPlaneNotScalingUpV1Beta2Reason = clusterv1.NotScalingUpV1Beta2Reason
129+
130+
// KubeadmControlPlaneScalingUpWaitingForReplicasSetV1Beta2Reason surfaces when the .spec.replicas
131+
// field of the KubeadmControlPlane is not set.
132+
KubeadmControlPlaneScalingUpWaitingForReplicasSetV1Beta2Reason = clusterv1.WaitingForReplicasSetV1Beta2Reason
111133
)
112134

113135
// KubeadmControlPlane's ScalingDown condition and corresponding reasons that will be used in v1Beta2 API version.
114136
const (
115-
// KubeadmControlPlaneScalingDownV1Beta2Condition is true if replicas > desired replicas.
137+
// KubeadmControlPlaneScalingDownV1Beta2Condition is true if actual replicas > desired replicas.
116138
KubeadmControlPlaneScalingDownV1Beta2Condition = clusterv1.ScalingDownV1Beta2Condition
139+
140+
// KubeadmControlPlaneScalingDownV1Beta2Reason surfaces when actual replicas > desired replicas.
141+
KubeadmControlPlaneScalingDownV1Beta2Reason = clusterv1.ScalingDownV1Beta2Reason
142+
143+
// KubeadmControlPlaneNotScalingDownV1Beta2Reason surfaces when actual replicas <= desired replicas.
144+
KubeadmControlPlaneNotScalingDownV1Beta2Reason = clusterv1.NotScalingDownV1Beta2Reason
145+
146+
// KubeadmControlPlaneScalingDownWaitingForReplicasSetV1Beta2Reason surfaces when the .spec.replicas
147+
// field of the KubeadmControlPlane is not set.
148+
KubeadmControlPlaneScalingDownWaitingForReplicasSetV1Beta2Reason = clusterv1.WaitingForReplicasSetV1Beta2Reason
117149
)
118150

119151
// KubeadmControlPlane's Remediating condition and corresponding reasons that will be used in v1Beta2 API version.
120152
const (
121153
// KubeadmControlPlaneRemediatingV1Beta2Condition surfaces details about ongoing remediation of the controlled machines, if any.
154+
// Note: KubeadmControlPlane only remediates machines with HealthCheckSucceeded set to false and with the OwnerRemediated condition set to false.
122155
KubeadmControlPlaneRemediatingV1Beta2Condition = clusterv1.RemediatingV1Beta2Condition
156+
157+
// KubeadmControlPlaneRemediatingV1Beta2Reason surfaces when kcp has at least one machine with HealthCheckSucceeded set to false
158+
// and with the OwnerRemediated condition set to false.
159+
KubeadmControlPlaneRemediatingV1Beta2Reason = clusterv1.RemediatingV1Beta2Reason
160+
161+
// KubeadmControlPlaneNotRemediatingV1Beta2Reason surfaces when kcp does not have any machine with HealthCheckSucceeded set to false
162+
// and with the OwnerRemediated condition set to false.
163+
KubeadmControlPlaneNotRemediatingV1Beta2Reason = clusterv1.NotRemediatingV1Beta2Reason
164+
165+
// KubeadmControlPlaneRemediatingInternalErrorV1Beta2Reason surfaces unexpected failures when computing the Remediating condition.
166+
KubeadmControlPlaneRemediatingInternalErrorV1Beta2Reason = clusterv1.InternalErrorV1Beta2Reason
123167
)
124168

125169
// Reasons that will be used for the OwnerRemediated condition set by MachineHealthCheck on KubeadmControlPlane controlled machines
@@ -201,7 +245,7 @@ const (
201245
// EtcdMemberHealthy condition and corresponding reasons that will be used for KubeadmControlPlane controlled machines in v1Beta2 API version.
202246
const (
203247
// KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition surfaces the status of the etcd member hosted on a KubeadmControlPlane controlled machine.
204-
KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition = "Healthy"
248+
KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition = "EtcdMemberHealthy"
205249

206250
// KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason surfaces when the etcd member hosted on a KubeadmControlPlane controlled machine is not healthy.
207251
KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason = "NotHealthy"

controlplane/kubeadm/internal/control_plane.go

+27-10
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ type ControlPlane struct {
4747
// reconciliationTime is the time of the current reconciliation, and should be used for all "now" calculations
4848
reconciliationTime metav1.Time
4949

50+
// InfraMachineTemplateIsNotFound is true if getting the infra machine template object failed with an NotFound err
51+
InfraMachineTemplateIsNotFound bool
52+
53+
// PreflightChecks contains description about pre flight check results blocking machines creation or deletion.
54+
PreflightCheckResults PreflightCheckResults
55+
5056
// TODO: we should see if we can combine these with the Machine objects so we don't have all these separate lookups
5157
// See discussion on https://github.com/kubernetes-sigs/cluster-api/pull/3405
5258
KubeadmConfigs map[string]*bootstrapv1.KubeadmConfig
@@ -56,6 +62,16 @@ type ControlPlane struct {
5662
workloadCluster WorkloadCluster
5763
}
5864

65+
// PreflightCheckResults contains description about pre flight check results blocking machines creation or deletion.
66+
type PreflightCheckResults struct {
67+
// HasDeletingMachine reports true if preflight check detected a deleting machine.
68+
HasDeletingMachine bool
69+
// ControlPlaneComponentsNotHealthy reports true if preflight check detected that the control plane components are not fully healthy.
70+
ControlPlaneComponentsNotHealthy bool
71+
// EtcdClusterNotHealthy reports true if preflight check detected that the etcd cluster is not fully healthy.
72+
EtcdClusterNotHealthy bool
73+
}
74+
5975
// NewControlPlane returns an instantiated ControlPlane.
6076
func NewControlPlane(ctx context.Context, managementCluster ManagementCluster, client client.Client, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, ownedMachines collections.Machines) (*ControlPlane, error) {
6177
infraObjects, err := getInfraResources(ctx, client, ownedMachines)
@@ -260,24 +276,25 @@ func (c *ControlPlane) UnhealthyMachinesWithUnhealthyControlPlaneComponents(mach
260276
return machines.Filter(collections.HasUnhealthyControlPlaneComponents(c.IsEtcdManaged()))
261277
}
262278

263-
// UnhealthyMachinesByMachineHealthCheck returns the list of control plane machines marked as unhealthy by Machine Health Check.
264-
func (c *ControlPlane) UnhealthyMachinesByMachineHealthCheck() collections.Machines {
265-
return c.Machines.Filter(collections.HasUnhealthyCondition)
279+
// UnhealthyMachines returns the list of control plane machines marked as unhealthy by MHC, no matter
280+
// if they are set to be remediated by KCP or not.
281+
func (c *ControlPlane) UnhealthyMachines() collections.Machines {
282+
return c.Machines.Filter(collections.IsUnhealthy)
266283
}
267284

268-
// HealthyMachinesByMachineHealthCheck returns the list of control plane machines not marked as unhealthy by Machine Health Check.
269-
func (c *ControlPlane) HealthyMachinesByMachineHealthCheck() collections.Machines {
270-
return c.Machines.Filter(collections.Not(collections.HasUnhealthyCondition))
285+
// HealthyMachines returns the list of control plane machines marked as healthy by MHC (or not targeted by any MHC instance).
286+
func (c *ControlPlane) HealthyMachines() collections.Machines {
287+
return c.Machines.Filter(collections.Not(collections.IsUnhealthy))
271288
}
272289

273-
// HasUnhealthyMachineByMachineHealthCheck returns true if any machine in the control plane is marked as unhealthy by Machine Health Check.
274-
func (c *ControlPlane) HasUnhealthyMachineByMachineHealthCheck() bool {
275-
return len(c.UnhealthyMachinesByMachineHealthCheck()) > 0
290+
// MachinesToBeRemediatedByKCP returns the list of control plane machines to be remediated by KCP.
291+
func (c *ControlPlane) MachinesToBeRemediatedByKCP() collections.Machines {
292+
return c.Machines.Filter(collections.IsUnhealthyAndOwnerRemediated)
276293
}
277294

278295
// HasHealthyMachineStillProvisioning returns true if any healthy machine in the control plane is still in the process of being provisioned.
279296
func (c *ControlPlane) HasHealthyMachineStillProvisioning() bool {
280-
return len(c.HealthyMachinesByMachineHealthCheck().Filter(collections.Not(collections.HasNode()))) > 0
297+
return len(c.HealthyMachines().Filter(collections.Not(collections.HasNode()))) > 0
281298
}
282299

283300
// PatchMachines patches all the machines conditions.

controlplane/kubeadm/internal/control_plane_test.go

+33-12
Original file line numberDiff line numberDiff line change
@@ -63,45 +63,66 @@ func TestControlPlane(t *testing.T) {
6363
})
6464
}
6565

66-
func TestHasUnhealthyMachine(t *testing.T) {
66+
func TestHasMachinesToBeRemediated(t *testing.T) {
6767
// healthy machine (without MachineHealthCheckSucceded condition)
68-
healthyMachine1 := &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "healthyMachine1"}}
68+
healthyMachineNotProvisioned := &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "healthyMachine1"}}
6969
// healthy machine (with MachineHealthCheckSucceded == true)
70-
healthyMachine2 := &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "healthyMachine2"}}
71-
conditions.MarkTrue(healthyMachine2, clusterv1.MachineHealthCheckSucceededCondition)
70+
healthyMachineProvisioned := &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "healthyMachine2"}, Status: clusterv1.MachineStatus{NodeRef: &corev1.ObjectReference{Kind: "Node", Name: "node1"}}}
71+
conditions.MarkTrue(healthyMachineProvisioned, clusterv1.MachineHealthCheckSucceededCondition)
7272
// unhealthy machine NOT eligible for KCP remediation (with MachineHealthCheckSucceded == False, but without MachineOwnerRemediated condition)
73-
unhealthyMachineNOTOwnerRemediated := &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "unhealthyMachineNOTOwnerRemediated"}}
73+
unhealthyMachineNOTOwnerRemediated := &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "unhealthyMachineNOTOwnerRemediated"}, Status: clusterv1.MachineStatus{NodeRef: &corev1.ObjectReference{Kind: "Node", Name: "node2"}}}
7474
conditions.MarkFalse(unhealthyMachineNOTOwnerRemediated, clusterv1.MachineHealthCheckSucceededCondition, clusterv1.MachineHasFailureReason, clusterv1.ConditionSeverityWarning, "Something is wrong")
7575
// unhealthy machine eligible for KCP remediation (with MachineHealthCheckSucceded == False, with MachineOwnerRemediated condition)
76-
unhealthyMachineOwnerRemediated := &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "unhealthyMachineOwnerRemediated"}}
76+
unhealthyMachineOwnerRemediated := &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "unhealthyMachineOwnerRemediated"}, Status: clusterv1.MachineStatus{NodeRef: &corev1.ObjectReference{Kind: "Node", Name: "node3"}}}
7777
conditions.MarkFalse(unhealthyMachineOwnerRemediated, clusterv1.MachineHealthCheckSucceededCondition, clusterv1.MachineHasFailureReason, clusterv1.ConditionSeverityWarning, "Something is wrong")
7878
conditions.MarkFalse(unhealthyMachineOwnerRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP should remediate this issue")
7979

8080
t.Run("One unhealthy machine to be remediated by KCP", func(t *testing.T) {
8181
c := ControlPlane{
8282
Machines: collections.FromMachines(
83-
healthyMachine1, // healthy machine, should be ignored
84-
healthyMachine2, // healthy machine, should be ignored (the MachineHealthCheckSucceededCondition is true)
83+
healthyMachineNotProvisioned, // healthy machine, should be ignored
84+
healthyMachineProvisioned, // healthy machine, should be ignored (the MachineHealthCheckSucceededCondition is true)
8585
unhealthyMachineNOTOwnerRemediated, // unhealthy machine, but KCP should not remediate it, should be ignored.
8686
unhealthyMachineOwnerRemediated,
8787
),
8888
}
8989

9090
g := NewWithT(t)
91-
g.Expect(c.HasUnhealthyMachineByMachineHealthCheck()).To(BeTrue())
91+
g.Expect(c.MachinesToBeRemediatedByKCP()).To(ConsistOf(unhealthyMachineOwnerRemediated))
92+
g.Expect(c.UnhealthyMachines()).To(ConsistOf(unhealthyMachineOwnerRemediated, unhealthyMachineNOTOwnerRemediated))
93+
g.Expect(c.HealthyMachines()).To(ConsistOf(healthyMachineNotProvisioned, healthyMachineProvisioned))
94+
g.Expect(c.HasHealthyMachineStillProvisioning()).To(BeTrue())
9295
})
9396

9497
t.Run("No unhealthy machine to be remediated by KCP", func(t *testing.T) {
9598
c := ControlPlane{
9699
Machines: collections.FromMachines(
97-
healthyMachine1, // healthy machine, should be ignored
98-
healthyMachine2, // healthy machine, should be ignored (the MachineHealthCheckSucceededCondition is true)
100+
healthyMachineNotProvisioned, // healthy machine, should be ignored
101+
healthyMachineProvisioned, // healthy machine, should be ignored (the MachineHealthCheckSucceededCondition is true)
99102
unhealthyMachineNOTOwnerRemediated, // unhealthy machine, but KCP should not remediate it, should be ignored.
100103
),
101104
}
102105

103106
g := NewWithT(t)
104-
g.Expect(c.HasUnhealthyMachineByMachineHealthCheck()).To(BeFalse())
107+
g.Expect(c.MachinesToBeRemediatedByKCP()).To(BeEmpty())
108+
g.Expect(c.UnhealthyMachines()).To(ConsistOf(unhealthyMachineNOTOwnerRemediated))
109+
g.Expect(c.HealthyMachines()).To(ConsistOf(healthyMachineNotProvisioned, healthyMachineProvisioned))
110+
g.Expect(c.HasHealthyMachineStillProvisioning()).To(BeTrue())
111+
})
112+
113+
t.Run("No unhealthy machine to be remediated by KCP", func(t *testing.T) {
114+
c := ControlPlane{
115+
Machines: collections.FromMachines(
116+
healthyMachineProvisioned, // healthy machine, should be ignored (the MachineHealthCheckSucceededCondition is true)
117+
unhealthyMachineNOTOwnerRemediated, // unhealthy machine, but KCP should not remediate it, should be ignored.
118+
),
119+
}
120+
121+
g := NewWithT(t)
122+
g.Expect(c.MachinesToBeRemediatedByKCP()).To(BeEmpty())
123+
g.Expect(c.UnhealthyMachines()).To(ConsistOf(unhealthyMachineNOTOwnerRemediated))
124+
g.Expect(c.HealthyMachines()).To(ConsistOf(healthyMachineProvisioned))
125+
g.Expect(c.HasHealthyMachineStillProvisioning()).To(BeFalse())
105126
})
106127
}
107128

0 commit comments

Comments
 (0)