kubernetes-sigs · sedefsavas · Sep 22, 2020 · Sep 28, 2020 · Sep 28, 2020 · Sep 28, 2020
diff --git a/api/v1alpha3/condition_consts.go b/api/v1alpha3/condition_consts.go
@@ -109,7 +109,8 @@ const (
 	// MachineHasFailureReason is the reason used when a machine has either a FailureReason or a FailureMessage set on its status.
 	MachineHasFailureReason = "MachineHasFailure"
 
-	// NodeNotFoundReason is the reason used when a machine's node has previously been observed but is now gone.
+	// NodeNotFoundReason (Severity=Error) documents a machine's node has previously been observed but is now gone.
+	// NB. provisioned --> NodeRef != ""
 	NodeNotFoundReason = "NodeNotFound"
 
 	// NodeStartupTimeoutReason is the reason used when a machine's node does not appear within the specified timeout.
@@ -120,7 +121,7 @@ const (
 )
 
 const (
-	// MachineOwnerRemediatedCondition is set on machines that have failed a healthcheck by the MachineHealthCheck controller.
+	// MachineOwnerRemediatedCondition is set on machines that have failed a healthcheck by the Machine's owner controller.
 	// MachineOwnerRemediatedCondition is set to False after a health check fails, but should be changed to True by the owning controller after remediation succeeds.
 	MachineOwnerRemediatedCondition ConditionType = "OwnerRemediated"
 

diff --git a/controlplane/kubeadm/api/v1alpha3/condition_consts.go b/controlplane/kubeadm/api/v1alpha3/condition_consts.go
@@ -66,3 +66,63 @@ const (
 	// ScalingDownReason (Severity=Info) documents a KubeadmControlPlane that is decreasing the number of replicas.
 	ScalingDownReason = "ScalingDown"
 )
+
+const (
+	// EtcdClusterHealthy documents the overall etcd cluster's health for the KCP-managed etcd.
+	EtcdClusterHealthy clusterv1.ConditionType = "EtcdClusterHealthy"
+
+	// EtcdClusterUnhealthyReason (Severity=Warning) is set when the etcd cluster as unhealthy due to
+	// i) if etcd cluster has lost its quorum.
+	// ii) if etcd cluster has alarms armed.
+	// iii) if etcd pods do not match with etcd members.
+	EtcdClusterUnhealthyReason = "EtcdClusterUnhealthy"
+)
+
+// Common Pod-related Condition Reasons used by Pod-related Conditions such as MachineAPIServerPodHealthyCondition etc.
+const (
+	// PodProvisioningReason (Severity=Info) documents a pod waiting  to be provisioned i.e., Pod is in "Pending" phase and
+	// PodScheduled and Initialized conditions are not yet set to True.
+	PodProvisioningReason = "PodProvisioning"
+
+	// PodMissingReason (Severity=Warning) documents a pod does not exist.
+	PodMissingReason = "PodMissing"
+
+	// PodFailedReason (Severity=Error) documents if
+	// i) a pod failed during provisioning i.e., Pod is in "Pending" phase and
+	// PodScheduled and Initialized conditions are set to True but ContainersReady or Ready condition is false
+	// (i.e., at least one of the containers are in waiting state(e.g CrashLoopbackOff, ImagePullBackOff)
+	// ii) a pod has at least one container that is terminated with a failure and hence Pod is in "Failed" phase.
+	PodFailedReason = "PodFailed"
+)
+
+// Conditions that are only for control-plane machines. KubeadmControlPlane is the owner of these conditions.
+
+const (
+	// MachineAPIServerPodHealthyCondition reports a machine's kube-apiserver's health status.
+	// Set to true if kube-apiserver pod is in "Running" phase, otherwise uses Pod-related Condition Reasons.
+	MachineAPIServerPodHealthyCondition clusterv1.ConditionType = "APIServerPodHealthy"
+
+	// MachineControllerManagerHealthyCondition reports a machine's kube-controller-manager's health status.
+	// Set to true if kube-controller-manager pod is in "Running" phase, otherwise uses Pod-related Condition Reasons.
+	MachineControllerManagerHealthyCondition clusterv1.ConditionType = "ControllerManagerPodHealthy"
+
+	// MachineSchedulerPodHealthyCondition reports a machine's kube-scheduler's health status.
+	// Set to true if kube-scheduler pod is in "Running" phase, otherwise uses Pod-related Condition Reasons.
+	MachineSchedulerPodHealthyCondition clusterv1.ConditionType = "SchedulerPodHealthy"
+
+	// MachineEtcdPodHealthyCondition reports a machine's etcd pod's health status.
+	// Set to true if etcd pod is in "Running" phase, otherwise uses Pod-related Condition Reasons.
+	MachineEtcdPodHealthyCondition clusterv1.ConditionType = "EtcdPodHealthy"
+)
+
+const (
+	// MachineEtcdMemberHealthyCondition documents if the machine has an healthy etcd member.
+	// If not true, Pod-related Condition Reasons can be used as reasons.
+	MachineEtcdMemberHealthyCondition clusterv1.ConditionType = "EtcdMemberHealthy"
+
+	// EtcdMemberUnhealthyReason (Severity=Error) documents a Machine's etcd member is unhealthy for a number of reasons:
+	// i) etcd member has alarms.
+	// ii) creating etcd client fails or using the created etcd client to perform some operations fails.
+	// iii) Quorum is lost
+	EtcdMemberUnhealthyReason = "EtcdMemberUnhealthy"
+)
diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go
@@ -221,6 +221,7 @@ func patchKubeadmControlPlane(ctx context.Context, patchHelper *patch.Helper, kc
 			controlplanev1.MachinesReadyCondition,
 			controlplanev1.AvailableCondition,
 			controlplanev1.CertificatesAvailableCondition,
+			controlplanev1.EtcdClusterHealthy,
 		),
 	)
 
@@ -289,21 +290,25 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
 		return ctrl.Result{}, err
 	}
 
-	ownedMachines := controlPlaneMachines.Filter(machinefilters.OwnedMachines(kcp))
-	if len(ownedMachines) != len(controlPlaneMachines) {
-		logger.Info("Not all control plane machines are owned by this KubeadmControlPlane, refusing to operate in mixed management mode")
-		return ctrl.Result{}, nil
-	}
-
-	controlPlane, err := internal.NewControlPlane(ctx, r.Client, cluster, kcp, ownedMachines)
-	if err != nil {
+	controlPlane, err := r.createControlPlane(ctx, cluster, kcp)
+	if controlPlane == nil || err != nil {
 		logger.Error(err, "failed to initialize control plane")
 		return ctrl.Result{}, err
 	}
+	if len(controlPlane.Machines) != len(controlPlaneMachines) {
+		logger.Info("Not all control plane machines are owned by this KubeadmControlPlane, refusing to operate in mixed management mode")
+		return ctrl.Result{}, nil
+	}
 
 	// Aggregate the operational state of all the machines; while aggregating we are adding the
 	// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
-	conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef())
+	conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, controlPlane.Machines.ConditionGetters(), conditions.AddSourceRef())
+
+	// reconcileControlPlaneHealth returns err if there is a machine being delete or control plane is unhealthy.
+	// If control plane is not initialized, then control-plane machines will be empty and hence health check will not fail.
+	if result, err := r.reconcileControlPlaneHealth(ctx, cluster, kcp, controlPlane); err != nil || !result.IsZero() {
+		return result, err
+	}
 
 	// Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations.
 	needRollout := controlPlane.MachinesNeedingRollout()
@@ -324,7 +329,7 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
 	}
 
 	// If we've made it this far, we can assume that all ownedMachines are up to date
-	numMachines := len(ownedMachines)
+	numMachines := len(controlPlane.Machines)
 	desiredReplicas := int(*kcp.Spec.Replicas)
 
 	switch {
@@ -372,13 +377,42 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
 	return ctrl.Result{}, nil
 }
 
+func (r *KubeadmControlPlaneReconciler) createControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane) (*internal.ControlPlane, error) {
+	logger := r.Log.WithValues("namespace", kcp.Namespace, "kubeadmControlPlane", kcp.Name, "cluster", cluster.Name)
+
+	controlPlaneMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster), machinefilters.ControlPlaneMachines(cluster.Name))
+	if err != nil {
+		logger.Error(err, "failed to retrieve control plane machines for cluster")
+		return nil, err
+	}
+	ownedMachines := controlPlaneMachines.Filter(machinefilters.OwnedMachines(kcp))
+
+	controlPlane, err := internal.NewControlPlane(ctx, r.Client, cluster, kcp, ownedMachines)
+	if err != nil {
+		logger.Error(err, "failed to initialize control plane")
+		return nil, err
+	}
+	return controlPlane, nil
+}
+
 // reconcileDelete handles KubeadmControlPlane deletion.
 // The implementation does not take non-control plane workloads into consideration. This may or may not change in the future.
 // Please see https://github.com/kubernetes-sigs/cluster-api/issues/2064.
 func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane) (ctrl.Result, error) {
 	logger := r.Log.WithValues("namespace", kcp.Namespace, "kubeadmControlPlane", kcp.Name, "cluster", cluster.Name)
 	logger.Info("Reconcile KubeadmControlPlane deletion")
 
+	controlPlane, err := r.createControlPlane(ctx, cluster, kcp)
+	if controlPlane == nil || err != nil {
+		logger.Error(err, "failed to initialize control plane")
+		return ctrl.Result{}, err
+	}
+
+	// Ignore the health check results here as well as the errors, they are used to set health related conditions on Machines.
+	// Errors may be dues not being able to get workload cluster nodes.
+	r.managementCluster.TargetClusterControlPlaneHealthCheck(ctx, controlPlane, util.ObjectKey(cluster)) //nolint
+	r.managementCluster.TargetClusterEtcdHealthCheck(ctx, controlPlane, util.ObjectKey(cluster)) //nolint
+
 	allMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster))
 	if err != nil {
 		return ctrl.Result{}, err
@@ -442,21 +476,43 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M
 	return nil
 }
 
-// reconcileHealth performs health checks for control plane components and etcd
+// reconcileControlPlaneHealth performs health checks for control plane components and etcd
 // It removes any etcd members that do not have a corresponding node.
 // Also, as a final step, checks if there is any machines that is being deleted.
-func (r *KubeadmControlPlaneReconciler) reconcileHealth(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
+func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneHealth(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
+	logger := r.Log.WithValues("namespace", kcp.Namespace, "kubeadmControlPlane", kcp.Name)
+
+	// If there is no KCP-owned control-plane machines, then control-plane has not been initialized yet.
+	if controlPlane.Machines.Len() == 0 {
+		return ctrl.Result{}, nil
+	}
+
+	for i := range controlPlane.Machines {
+		m := controlPlane.Machines[i]
+		// Initialize the patch helper.
+		patchHelper, err := patch.NewHelper(m, r.Client)
+		if err != nil {
+			return ctrl.Result{}, err
+		}
+
+		defer func() {
+			// Always attempt to Patch the Machine conditions after each health reconciliation.
+			if err := patchHelper.Patch(ctx, m); err != nil {
+				logger.Error(err, "Failed to patch KubeadmControlPlane Machine", "machine", m.Name)
+			}
+		}()
+	}
 
 	// Do a health check of the Control Plane components
-	if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster)); err != nil {
+	if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, controlPlane, util.ObjectKey(cluster)); err != nil {
 		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
 			"Waiting for control plane to pass control plane health check to continue reconciliation: %v", err)
-		return ctrl.Result{RequeueAfter: healthCheckFailedRequeueAfter}, nil
+		return ctrl.Result{}, errors.Wrap(err, "failed to pass control-plane health check")
 	}
 
 	// If KCP should manage etcd, ensure etcd is healthy.
 	if controlPlane.IsEtcdManaged() {
-		if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster)); err != nil {
+		if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, controlPlane, util.ObjectKey(cluster)); err != nil {
 			errList := []error{errors.Wrap(err, "failed to pass etcd health check")}
 			r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
 				"Waiting for control plane to pass etcd health check to continue reconciliation: %v", err)

diff --git a/controlplane/kubeadm/controllers/controller_test.go b/controlplane/kubeadm/controllers/controller_test.go
@@ -572,7 +572,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) {
 		g := NewWithT(t)
 
 		cluster, kcp, tmpl := createClusterWithControlPlane()
-		cluster.Spec.ControlPlaneEndpoint.Host = "nodomain.example.com"
+		cluster.Spec.ControlPlaneEndpoint.Host = "nodomain2.example.com"
 		cluster.Spec.ControlPlaneEndpoint.Port = 6443
 		kcp.Spec.Version = version
 
@@ -642,7 +642,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) {
 		g := NewWithT(t)
 
 		cluster, kcp, tmpl := createClusterWithControlPlane()
-		cluster.Spec.ControlPlaneEndpoint.Host = "nodomain.example.com"
+		cluster.Spec.ControlPlaneEndpoint.Host = "nodomain3.example.com"
 		cluster.Spec.ControlPlaneEndpoint.Port = 6443
 		kcp.Spec.Version = "v1.17.0"
 

diff --git a/controlplane/kubeadm/controllers/fakes_test.go b/controlplane/kubeadm/controllers/fakes_test.go
@@ -57,19 +57,32 @@ func (f *fakeManagementCluster) GetMachinesForCluster(c context.Context, n clien
 	return f.Machines, nil
 }
 
-func (f *fakeManagementCluster) TargetClusterControlPlaneIsHealthy(_ context.Context, _ client.ObjectKey) error {
+func (f *fakeManagementCluster) TargetClusterControlPlaneIsHealthy(_ context.Context, _ *internal.ControlPlane, _ client.ObjectKey) error {
 	if !f.ControlPlaneHealthy {
 		return errors.New("control plane is not healthy")
 	}
 	return nil
 }
 
-func (f *fakeManagementCluster) TargetClusterEtcdIsHealthy(_ context.Context, _ client.ObjectKey) error {
+func (f *fakeManagementCluster) TargetClusterEtcdIsHealthy(_ context.Context, _ *internal.ControlPlane, _ client.ObjectKey) error {
 	if !f.EtcdHealthy {
 		return errors.New("etcd is not healthy")
 	}
 	return nil
 }
+func (f *fakeManagementCluster) TargetClusterEtcdHealthCheck(_ context.Context, _ *internal.ControlPlane, _ client.ObjectKey) (internal.HealthCheckResult, error) {
+	if !f.EtcdHealthy {
+		return nil, errors.New("etcd is not healthy")
+	}
+	return nil, nil
+}
+
+func (f *fakeManagementCluster) TargetClusterControlPlaneHealthCheck(_ context.Context, _ *internal.ControlPlane, _ client.ObjectKey) (internal.HealthCheckResult, error) {
+	if !f.ControlPlaneHealthy {
+		return nil, errors.New("control plane is not healthy")
+	}
+	return nil, nil
+}
 
 type fakeWorkloadCluster struct {
 	*internal.Workload

diff --git a/controlplane/kubeadm/controllers/scale.go b/controlplane/kubeadm/controllers/scale.go
@@ -63,11 +63,6 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte
 func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
 	logger := controlPlane.Logger()
 
-	// reconcileHealth returns err if there is a machine being delete which is a required condition to check before scaling up
-	if result, err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil || !result.IsZero() {
-		return result, err
-	}
-
 	// Create the bootstrap configuration
 	bootstrapSpec := controlPlane.JoinControlPlaneConfig()
 	fd := controlPlane.NextFailureDomainForScaleUp()
@@ -90,10 +85,6 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
 ) (ctrl.Result, error) {
 	logger := controlPlane.Logger()
 
-	if result, err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil || !result.IsZero() {
-		return result, err
-	}
-
 	workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster))
 	if err != nil {
 		logger.Error(err, "Failed to create client to workload cluster")
@@ -123,7 +114,8 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
 		}
 	}
 
-	if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster)); err != nil {
+	// TODO: check if this is needed after moving the health check to the main reconcile
+	if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, controlPlane, util.ObjectKey(cluster)); err != nil {
 		logger.V(2).Info("Waiting for control plane to pass control plane health check before removing a control plane machine", "cause", err)
 		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
 			"Waiting for control plane to pass control plane health check before removing a control plane machine: %v", err)

diff --git a/controlplane/kubeadm/controllers/scale_test.go b/controlplane/kubeadm/controllers/scale_test.go
@@ -116,14 +116,16 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) {
 
 		result, err := r.scaleUpControlPlane(context.Background(), cluster, kcp, controlPlane)
 		g.Expect(result).To(Equal(ctrl.Result{Requeue: true}))
-		g.Expect(err).ToNot(HaveOccurred())
+		g.Expect(err).NotTo(HaveOccurred())
 
 		controlPlaneMachines := clusterv1.MachineList{}
 		g.Expect(fakeClient.List(context.Background(), &controlPlaneMachines)).To(Succeed())
 		g.Expect(controlPlaneMachines.Items).To(HaveLen(3))
 	})
 	t.Run("does not create a control plane Machine if health checks fail", func(t *testing.T) {
 		cluster, kcp, genericMachineTemplate := createClusterWithControlPlane()
+		cluster.Spec.ControlPlaneEndpoint.Host = "nodomain.example.com"
+		cluster.Spec.ControlPlaneEndpoint.Port = 6443
 		initObjs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), genericMachineTemplate.DeepCopy()}
 
 		beforeMachines := internal.NewFilterableMachineCollection()
@@ -170,18 +172,11 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) {
 				Log:                       log.Log,
 				recorder:                  record.NewFakeRecorder(32),
 			}
-			controlPlane := &internal.ControlPlane{
-				KCP:      kcp,
-				Cluster:  cluster,
-				Machines: beforeMachines,
-			}
 
-			result, err := r.scaleUpControlPlane(context.Background(), cluster.DeepCopy(), kcp.DeepCopy(), controlPlane)
-			if tc.expectErr {
-				g.Expect(err).To(HaveOccurred())
-			}
-			g.Expect(result).To(Equal(tc.expectResult))
+			_, err := r.reconcile(context.Background(), cluster, kcp)
+			g.Expect(err).To(HaveOccurred())
 
+			// scaleUpControlPlane is never called due to health check failure and new machine is not created to scale up.
 			controlPlaneMachines := &clusterv1.MachineList{}
 			g.Expect(fakeClient.List(context.Background(), controlPlaneMachines)).To(Succeed())
 			g.Expect(controlPlaneMachines.Items).To(HaveLen(len(beforeMachines)))

diff --git a/controlplane/kubeadm/controllers/status.go b/controlplane/kubeadm/controllers/status.go
@@ -18,7 +18,6 @@ package controllers
 
 import (
 	"context"
-
 	"github.com/pkg/errors"
 	clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
 	controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3"
@@ -29,7 +28,7 @@ import (
 )
 
 // updateStatus is called after every reconcilitation loop in a defer statement to always make sure we have the
-// resource status subresourcs up-to-date.
+// resource status subresources up-to-date.
 func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, cluster *clusterv1.Cluster) error {
 	selector := machinefilters.ControlPlaneSelectorForCluster(cluster.Name)
 	// Copy label selector to its status counterpart in string format.