Skip to content

Commit 7803764

Browse files
Address comments
1 parent 88bcbf0 commit 7803764

File tree

6 files changed

+116
-101
lines changed

6 files changed

+116
-101
lines changed

controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go

+13-5
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,33 @@ import clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2020

2121
// KubeadmControlPlane's Available condition and corresponding reasons that will be used in v1Beta2 API version.
2222
const (
23-
// KubeadmControlPlaneAvailableV1Beta2Condition is True if KubeadmControlPlane not delete, `CertificatesAvailable` is true,
24-
// at least one Kubernetes API server, scheduler and controller manager control plane are healthy,
23+
// KubeadmControlPlaneAvailableV1Beta2Condition is true if KubeadmControlPlane not deleted, `CertificatesAvailable` is true,
24+
// at least one Machine with Kubernetes API server, scheduler and controller manager healthy,
2525
// and etcd has enough operational members to meet quorum requirements.
26+
// More specifically, considering how kubeadm layouts components:
27+
// - Kubernetes API server, scheduler and controller manager health is inferred by the status of
28+
// the corresponding Pods hosted on each machine.
29+
// - In case of managed etcd, also an healthy etcd Pod and an healthy etcd member must exist on the same
30+
// machine with the healthy Kubernetes API server, scheduler and controller manager, otherwise the k8s control
31+
// plane cannot be considered operational (if etcd is not operational on machine, most likely also API server,
32+
// scheduler and controller manager on the same machine will be impacted).
33+
// - In case of external etcd, KCP cannot make any assumption on etcd status, so all the etcd checks are skipped.
2634
KubeadmControlPlaneAvailableV1Beta2Condition = clusterv1.AvailableV1Beta2Condition
2735

2836
// KubeadmControlPlaneAvailableInspectionFailedV1Beta2Reason documents a failure when inspecting the status of the
2937
// etcd cluster hosted on KubeadmControlPlane controlled machines.
3038
KubeadmControlPlaneAvailableInspectionFailedV1Beta2Reason = clusterv1.InspectionFailedV1Beta2Reason
3139

32-
// KubeadmControlPlaneAvailableV1Beta2Reason surfaces when a Deployment is available.
40+
// KubeadmControlPlaneAvailableV1Beta2Reason surfaces when the KubeadmControlPlane is available.
3341
KubeadmControlPlaneAvailableV1Beta2Reason = clusterv1.AvailableV1Beta2Reason
3442

35-
// KubeadmControlPlaneNotAvailableV1Beta2Reason surfaces when a Deployment is not available.
43+
// KubeadmControlPlaneNotAvailableV1Beta2Reason surfaces when the KubeadmControlPlane is not available.
3644
KubeadmControlPlaneNotAvailableV1Beta2Reason = clusterv1.NotAvailableV1Beta2Reason
3745
)
3846

3947
// KubeadmControlPlane's Initialized condition and corresponding reasons that will be used in v1Beta2 API version.
4048
const (
41-
// KubeadmControlPlaneInitializedV1Beta2Condition is True when the control plane is functional enough to accept
49+
// KubeadmControlPlaneInitializedV1Beta2Condition is true when the control plane is functional enough to accept
4250
// requests. This information is usually used as a signal for starting all the provisioning operations that
4351
// depend on a functional API server, but do not require a full HA control plane to exist.
4452
KubeadmControlPlaneInitializedV1Beta2Condition = "Initialized"

controlplane/kubeadm/internal/control_plane.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ type ControlPlane struct {
6161

6262
// EtcdMembers is the list of members read while computing reconcileControlPlaneConditions; also additional info below
6363
// comes from the same func.
64-
// NOTE: Those info are computed on what we know, so we can reason about availability eve if with a certain degree of problems in the cluster
64+
// NOTE: Those info are computed based on the info KCP was able to collect during inspection (e.g. if on a 3 CP
65+
// control plane one etcd member is down, those info are based on the answer collected from two members only).
66+
// NOTE: Those info are specifically designed for computing KCP's Available condition.
6567
EtcdMembers []*etcd.Member
6668
EtcdMembersAgreeOnMemberList bool
6769
EtcdMembersAgreeOnClusterID bool

controlplane/kubeadm/internal/controllers/status.go

+44-42
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ func setRemediatingCondition(ctx context.Context, kcp *controlplanev1.KubeadmCon
424424
})
425425
}
426426

427-
func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControlPlane, etcdIsManaged bool, etcdMembers []*etcd.Member, etcdMembersAgreeOnMemberList bool, etcdMembersAgreeOnClusterID bool, etcdMembersAndMachinesAreMatching bool, machines collections.Machines) {
427+
func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControlPlane, etcdIsManaged bool, etcdMembers []*etcd.Member, etcdMembersAgreeOnMemberList, etcdMembersAgreeOnClusterID, etcdMembersAndMachinesAreMatching bool, machines collections.Machines) {
428428
if !kcp.Status.Initialized {
429429
v1beta2conditions.Set(kcp, metav1.Condition{
430430
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
@@ -435,50 +435,52 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
435435
return
436436
}
437437

438-
if etcdIsManaged && etcdMembers == nil {
439-
v1beta2conditions.Set(kcp, metav1.Condition{
440-
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
441-
Status: metav1.ConditionUnknown,
442-
Reason: controlplanev1.KubeadmControlPlaneAvailableInspectionFailedV1Beta2Reason,
443-
Message: "Failed to get etcd members",
444-
})
445-
return
446-
}
438+
if etcdIsManaged {
439+
if etcdMembers == nil {
440+
v1beta2conditions.Set(kcp, metav1.Condition{
441+
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
442+
Status: metav1.ConditionUnknown,
443+
Reason: controlplanev1.KubeadmControlPlaneAvailableInspectionFailedV1Beta2Reason,
444+
Message: "Failed to get etcd members",
445+
})
446+
return
447+
}
447448

448-
if etcdIsManaged && !etcdMembersAgreeOnMemberList {
449-
v1beta2conditions.Set(kcp, metav1.Condition{
450-
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
451-
Status: metav1.ConditionFalse,
452-
Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason,
453-
Message: "At least one etcd member reports a list of etcd members different than the list reported by other members",
454-
})
455-
return
456-
}
449+
if !etcdMembersAgreeOnMemberList {
450+
v1beta2conditions.Set(kcp, metav1.Condition{
451+
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
452+
Status: metav1.ConditionFalse,
453+
Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason,
454+
Message: "At least one etcd member reports a list of etcd members different than the list reported by other members",
455+
})
456+
return
457+
}
457458

458-
if etcdIsManaged && !etcdMembersAgreeOnClusterID {
459-
v1beta2conditions.Set(kcp, metav1.Condition{
460-
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
461-
Status: metav1.ConditionFalse,
462-
Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason,
463-
Message: "At least one etcd member reports a cluster ID different than the cluster ID reported by other members",
464-
})
465-
return
466-
}
459+
if !etcdMembersAgreeOnClusterID {
460+
v1beta2conditions.Set(kcp, metav1.Condition{
461+
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
462+
Status: metav1.ConditionFalse,
463+
Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason,
464+
Message: "At least one etcd member reports a cluster ID different than the cluster ID reported by other members",
465+
})
466+
return
467+
}
467468

468-
if etcdIsManaged && !etcdMembersAndMachinesAreMatching {
469-
v1beta2conditions.Set(kcp, metav1.Condition{
470-
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
471-
Status: metav1.ConditionFalse,
472-
Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason,
473-
Message: "The list of etcd members does not match the list of Machines and Nodes",
474-
})
475-
return
469+
if !etcdMembersAndMachinesAreMatching {
470+
v1beta2conditions.Set(kcp, metav1.Condition{
471+
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
472+
Status: metav1.ConditionFalse,
473+
Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason,
474+
Message: "The list of etcd members does not match the list of Machines and Nodes",
475+
})
476+
return
477+
}
476478
}
477479

478480
// Determine control plane availability looking at machines conditions, which at this stage are
479481
// already surfacing status from etcd member and all control plane pods hosted on every machine.
480-
// Note: we intentionally use the number of etcd members for determine the etcd quorum because
481-
// etcd members could not match with machines, e.g. while provisioning a new machine.
482+
// Note: we intentionally use the number of etcd members to determine the etcd quorum because
483+
// etcd members might not match with machines, e.g. while provisioning a new machine.
482484
etcdQuorum := (len(etcdMembers) / 2.0) + 1
483485
k8sControlPlaneHealthy := 0
484486
etcdMembersHealthy := 0
@@ -537,16 +539,16 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
537539
if etcdIsManaged && etcdMembersHealthy < etcdQuorum {
538540
switch etcdMembersHealthy {
539541
case 0:
540-
messages = append(messages, fmt.Sprintf("There are no healthy etcd member, at least %d required", etcdQuorum))
542+
messages = append(messages, fmt.Sprintf("There are no healthy etcd member, at least %d required for etcd quorum", etcdQuorum))
541543
case 1:
542-
messages = append(messages, fmt.Sprintf("There is 1 healthy etcd member, at least %d required", etcdQuorum))
544+
messages = append(messages, fmt.Sprintf("There is 1 healthy etcd member, at least %d required for etcd quorum", etcdQuorum))
543545
default:
544-
messages = append(messages, fmt.Sprintf("There are %d healthy etcd members, at least %d required", etcdMembersHealthy, etcdQuorum))
546+
messages = append(messages, fmt.Sprintf("There are %d healthy etcd members, at least %d required for etcd quorum", etcdMembersHealthy, etcdQuorum))
545547
}
546548
}
547549

548550
if k8sControlPlaneHealthy < 1 {
549-
messages = append(messages, "There are no healthy control plane instances, at least 1 required")
551+
messages = append(messages, "There are no Machines with healthy control plane components, at least 1 required")
550552
}
551553

552554
v1beta2conditions.Set(kcp, metav1.Condition{

0 commit comments

Comments
 (0)