@@ -86,6 +86,8 @@ type KubeadmControlPlaneReconciler struct {
86
86
// WatchFilterValue is the label value used to filter events prior to reconciliation.
87
87
WatchFilterValue string
88
88
89
+ RemoteConditionsGracePeriod time.Duration
90
+
89
91
// Deprecated: DeprecatedInfraMachineNaming. Name the InfraStructureMachines after the InfraMachineTemplate.
90
92
DeprecatedInfraMachineNaming bool
91
93
@@ -95,6 +97,14 @@ type KubeadmControlPlaneReconciler struct {
95
97
}
96
98
97
99
func (r * KubeadmControlPlaneReconciler ) SetupWithManager (ctx context.Context , mgr ctrl.Manager , options controller.Options ) error {
100
+ if r .Client == nil || r .SecretCachingClient == nil || r .ClusterCache == nil ||
101
+ r .EtcdDialTimeout == time .Duration (0 ) || r .EtcdCallTimeout == time .Duration (0 ) ||
102
+ r .RemoteConditionsGracePeriod < 2 * time .Minute {
103
+ return errors .New ("Client, SecretCachingClient and ClusterCache must not be nil and " +
104
+ "EtcdDialTimeout and EtcdCallTimeout must not be 0 and " +
105
+ "RemoteConditionsGracePeriod must not be < 2m" )
106
+ }
107
+
98
108
predicateLog := ctrl .LoggerFrom (ctx ).WithValues ("controller" , "kubeadmcontrolplane" )
99
109
c , err := ctrl .NewControllerManagedBy (mgr ).
100
110
For (& controlplanev1.KubeadmControlPlane {}).
@@ -111,7 +121,8 @@ func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mg
111
121
),
112
122
),
113
123
).
114
- WatchesRawSource (r .ClusterCache .GetClusterSource ("kubeadmcontrolplane" , r .ClusterToKubeadmControlPlane )).
124
+ WatchesRawSource (r .ClusterCache .GetClusterSource ("kubeadmcontrolplane" , r .ClusterToKubeadmControlPlane ,
125
+ clustercache .WatchForProbeFailure (r .RemoteConditionsGracePeriod ))).
115
126
Build (r )
116
127
if err != nil {
117
128
return errors .Wrap (err , "failed setting up with a controller manager" )
@@ -802,45 +813,169 @@ func (r *KubeadmControlPlaneReconciler) syncMachines(ctx context.Context, contro
802
813
803
814
// reconcileControlPlaneConditions is responsible of reconciling conditions reporting the status of static pods and
804
815
// the status of the etcd cluster.
805
- func (r * KubeadmControlPlaneReconciler ) reconcileControlPlaneConditions (ctx context.Context , controlPlane * internal.ControlPlane ) error {
816
+ func (r * KubeadmControlPlaneReconciler ) reconcileControlPlaneConditions (ctx context.Context , controlPlane * internal.ControlPlane ) ( reterr error ) {
806
817
// If the cluster is not yet initialized, there is no way to connect to the workload cluster and fetch information
807
818
// for updating conditions. Return early.
808
- if ! controlPlane .KCP .Status .Initialized {
819
+ // We additionally check for the Available condition. The Available condition is set at the same time
820
+ // as .status.initialized and is never changed to false again. Below we'll need the transition time of the
821
+ // Available condition to check if the remote conditions grace period is already reached.
822
+ // Note: The Machine controller uses the ControlPlaneInitialized condition on the Cluster instead for
823
+ // the same check. We don't use the ControlPlaneInitialized condition from the Cluster here because KCP
824
+ // Reconcile does (currently) not get triggered from condition changes to the Cluster object.
825
+ controlPlaneInitialized := conditions .Get (controlPlane .KCP , controlplanev1 .AvailableCondition )
826
+ if ! controlPlane .KCP .Status .Initialized ||
827
+ controlPlaneInitialized == nil || controlPlaneInitialized .Status != corev1 .ConditionTrue {
809
828
v1beta2conditions .Set (controlPlane .KCP , metav1.Condition {
810
829
Type : controlplanev1 .KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition ,
811
830
Status : metav1 .ConditionUnknown ,
812
831
Reason : controlplanev1 .KubeadmControlPlaneEtcdClusterInspectionFailedV1Beta2Reason ,
813
- Message : "Waiting for remote connection " ,
832
+ Message : "Waiting for Cluster control plane to be initialized " ,
814
833
})
815
834
816
835
v1beta2conditions .Set (controlPlane .KCP , metav1.Condition {
817
836
Type : controlplanev1 .KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition ,
818
837
Status : metav1 .ConditionUnknown ,
819
838
Reason : controlplanev1 .KubeadmControlPlaneControlPlaneComponentsInspectionFailedV1Beta2Reason ,
820
- Message : "Waiting for remote connection " ,
839
+ Message : "Waiting for Cluster control plane to be initialized " ,
821
840
})
822
841
823
842
return nil
824
843
}
825
844
845
+ defer func () {
846
+ // Patch machines with the updated conditions.
847
+ reterr = kerrors .NewAggregate ([]error {reterr , controlPlane .PatchMachines (ctx )})
848
+ }()
849
+
850
+ // Remote conditions grace period is counted from the later of last probe success and control plane initialized.
851
+ lastProbeSuccessTime := r .ClusterCache .GetLastProbeSuccessTimestamp (ctx , client .ObjectKeyFromObject (controlPlane .Cluster ))
852
+ if time .Since (maxTime (lastProbeSuccessTime , controlPlaneInitialized .LastTransitionTime .Time )) > r .RemoteConditionsGracePeriod {
853
+ // Overwrite conditions to ConnectionDown.
854
+ setConditionsToUnknown (setConditionsToUnknownInput {
855
+ ControlPlane : controlPlane ,
856
+ Overwrite : true ,
857
+ EtcdClusterHealthyReason : controlplanev1 .KubeadmControlPlaneEtcdClusterConnectionDownV1Beta2Reason ,
858
+ ControlPlaneComponentsHealthyReason : controlplanev1 .KubeadmControlPlaneControlPlaneComponentsConnectionDownV1Beta2Reason ,
859
+ StaticPodReason : controlplanev1 .KubeadmControlPlaneMachinePodConnectionDownV1Beta2Reason ,
860
+ EtcdMemberHealthyReason : controlplanev1 .KubeadmControlPlaneMachineEtcdMemberConnectionDownV1Beta2Reason ,
861
+ Message : lastProbeSuccessMessage (lastProbeSuccessTime ),
862
+ })
863
+ return errors .Errorf ("connection to the workload cluster is down" )
864
+ }
865
+
826
866
workloadCluster , err := controlPlane .GetWorkloadCluster (ctx )
827
867
if err != nil {
828
- return errors .Wrap (err , "cannot get remote client to workload cluster" )
868
+ if errors .Is (err , clustercache .ErrClusterNotConnected ) {
869
+ // If conditions are not set, set them to ConnectionDown.
870
+ setConditionsToUnknown (setConditionsToUnknownInput {
871
+ ControlPlane : controlPlane ,
872
+ Overwrite : false , // Don't overwrite.
873
+ EtcdClusterHealthyReason : controlplanev1 .KubeadmControlPlaneEtcdClusterConnectionDownV1Beta2Reason ,
874
+ ControlPlaneComponentsHealthyReason : controlplanev1 .KubeadmControlPlaneControlPlaneComponentsConnectionDownV1Beta2Reason ,
875
+ StaticPodReason : controlplanev1 .KubeadmControlPlaneMachinePodConnectionDownV1Beta2Reason ,
876
+ EtcdMemberHealthyReason : controlplanev1 .KubeadmControlPlaneMachineEtcdMemberConnectionDownV1Beta2Reason ,
877
+ Message : lastProbeSuccessMessage (lastProbeSuccessTime ),
878
+ })
879
+ return errors .Wrap (err , "cannot get client for the workload cluster" )
880
+ }
881
+
882
+ // Overwrite conditions to InspectionFailed.
883
+ setConditionsToUnknown (setConditionsToUnknownInput {
884
+ ControlPlane : controlPlane ,
885
+ Overwrite : true ,
886
+ EtcdClusterHealthyReason : controlplanev1 .KubeadmControlPlaneEtcdClusterInspectionFailedV1Beta2Reason ,
887
+ ControlPlaneComponentsHealthyReason : controlplanev1 .KubeadmControlPlaneControlPlaneComponentsInspectionFailedV1Beta2Reason ,
888
+ StaticPodReason : controlplanev1 .KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason ,
889
+ EtcdMemberHealthyReason : controlplanev1 .KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason ,
890
+ Message : "Please check controller logs for errors" ,
891
+ })
892
+ return errors .Wrap (err , "cannot get client for the workload cluster" )
829
893
}
830
894
831
895
// Update conditions status
832
896
workloadCluster .UpdateStaticPodConditions (ctx , controlPlane )
833
897
workloadCluster .UpdateEtcdConditions (ctx , controlPlane )
834
898
835
- // Patch machines with the updated conditions.
836
- if err := controlPlane .PatchMachines (ctx ); err != nil {
837
- return err
838
- }
839
-
840
899
// KCP will be patched at the end of Reconcile to reflect updated conditions, so we can return now.
841
900
return nil
842
901
}
843
902
903
+ type setConditionsToUnknownInput struct {
904
+ ControlPlane * internal.ControlPlane
905
+ Overwrite bool
906
+ EtcdClusterHealthyReason string
907
+ ControlPlaneComponentsHealthyReason string
908
+ StaticPodReason string
909
+ EtcdMemberHealthyReason string
910
+ Message string
911
+ }
912
+
913
+ func setConditionsToUnknown (input setConditionsToUnknownInput ) {
914
+ etcdClusterHealthySet := v1beta2conditions .Has (input .ControlPlane .KCP , controlplanev1 .KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition )
915
+ controlPlaneComponentsHealthySet := v1beta2conditions .Has (input .ControlPlane .KCP , controlplanev1 .KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition )
916
+
917
+ if input .Overwrite || ! etcdClusterHealthySet {
918
+ v1beta2conditions .Set (input .ControlPlane .KCP , metav1.Condition {
919
+ Type : controlplanev1 .KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition ,
920
+ Status : metav1 .ConditionUnknown ,
921
+ Reason : input .EtcdClusterHealthyReason ,
922
+ Message : input .Message ,
923
+ })
924
+ for _ , machine := range input .ControlPlane .Machines {
925
+ if input .ControlPlane .IsEtcdManaged () {
926
+ v1beta2conditions .Set (machine , metav1.Condition {
927
+ Type : controlplanev1 .KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition ,
928
+ Status : metav1 .ConditionUnknown ,
929
+ Reason : input .EtcdMemberHealthyReason ,
930
+ Message : input .Message ,
931
+ })
932
+ }
933
+ }
934
+ }
935
+
936
+ if input .Overwrite || ! controlPlaneComponentsHealthySet {
937
+ v1beta2conditions .Set (input .ControlPlane .KCP , metav1.Condition {
938
+ Type : controlplanev1 .KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition ,
939
+ Status : metav1 .ConditionUnknown ,
940
+ Reason : input .ControlPlaneComponentsHealthyReason ,
941
+ Message : input .Message ,
942
+ })
943
+
944
+ allMachinePodV1beta2Conditions := []string {
945
+ controlplanev1 .KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition ,
946
+ controlplanev1 .KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition ,
947
+ controlplanev1 .KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition ,
948
+ }
949
+ if input .ControlPlane .IsEtcdManaged () {
950
+ allMachinePodV1beta2Conditions = append (allMachinePodV1beta2Conditions , controlplanev1 .KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition )
951
+ }
952
+ for _ , machine := range input .ControlPlane .Machines {
953
+ for _ , condition := range allMachinePodV1beta2Conditions {
954
+ v1beta2conditions .Set (machine , metav1.Condition {
955
+ Type : condition ,
956
+ Status : metav1 .ConditionUnknown ,
957
+ Reason : input .StaticPodReason ,
958
+ Message : input .Message ,
959
+ })
960
+ }
961
+ }
962
+ }
963
+ }
964
+
965
+ func lastProbeSuccessMessage (lastProbeSuccessTime time.Time ) string {
966
+ if lastProbeSuccessTime .IsZero () {
967
+ return ""
968
+ }
969
+ return fmt .Sprintf ("Last successful probe at %s" , lastProbeSuccessTime .Format (time .RFC3339 ))
970
+ }
971
+
972
+ func maxTime (t1 , t2 time.Time ) time.Time {
973
+ if t1 .After (t2 ) {
974
+ return t1
975
+ }
976
+ return t2
977
+ }
978
+
844
979
// reconcileEtcdMembers ensures the number of etcd members is in sync with the number of machines/nodes.
845
980
// This is usually required after a machine deletion.
846
981
//
0 commit comments