Skip to content

Commit 5fc0354

Browse files
committed
pkg/operator/status: Drop kubelet skew guard, add RHEL guard
The kubelet skew guards are from 1471d2c (Bug 1986453: Check for API server and node versions skew, 2021-07-27, #2658). But the Kube API server also landed similar guards in openshift/cluster-kube-apiserver-operator@9ce4f74775 (add KubeletVersionSkewController, 2021-08-26, openshift/cluster-kube-apiserver-operator#1199). openshift/enhancements@0ba744e750 (eus-upgrades-mvp: don't enforce skew check in MCO, 2021-04-29, openshift/enhancements#762) had shifted the proposal form MCO-guards to KAS-guards, so I'm not entirely clear on why the MCO guards landed at all. But it's convenient for me that they did, because while I'm dropping them here, I'm recycling the Node lister for a new check. 4.19 is dropping bare-RHEL support, and I want the Node lister to look for RHEL entries like: osImage: Red Hat Enterprise Linux 8.6 (Ootpa) but we are ok with RHCOS entries like: osImage: Red Hat Enterprise Linux CoreOS 419.96.202503032242-0
1 parent 28740b6 commit 5fc0354

File tree

1 file changed

+30
-104
lines changed

1 file changed

+30
-104
lines changed

pkg/operator/status.go

+30-104
Original file line numberDiff line numberDiff line change
@@ -254,13 +254,6 @@ func (optr *Operator) syncDegradedStatus(co *configv1.ClusterOperator, ierr sync
254254
cov1helpers.SetStatusCondition(&co.Status.Conditions, coDegradedCondition)
255255
}
256256

257-
const (
258-
skewUnchecked = "KubeletSkewUnchecked"
259-
skewSupported = "KubeletSkewSupported"
260-
skewUnsupported = "KubeletSkewUnsupported"
261-
skewPresent = "KubeletSkewPresent"
262-
)
263-
264257
// syncUpgradeableStatus applies the new condition to the mco's ClusterOperator object.
265258
func (optr *Operator) syncUpgradeableStatus(co *configv1.ClusterOperator) error {
266259

@@ -321,37 +314,20 @@ func (optr *Operator) syncUpgradeableStatus(co *configv1.ClusterOperator) error
321314
coStatusCondition.Message = "One or more machine config pools are updating, please see `oc get mcp` for further details"
322315
}
323316

324-
// don't overwrite status if updating or degraded
325-
if !updating && !degraded && !interrupted {
326-
skewStatus, status, err := optr.isKubeletSkewSupported(pools)
317+
// don't overwrite status if already grumpy
318+
if coStatusCondition.Status == configv1.ConditionTrue {
319+
condition, err := optr.checkNodeUpgradeable(pools)
327320
if err != nil {
328-
klog.Errorf("Error checking version skew: %v, kubelet skew status: %v, status reason: %v, status message: %v", err, skewStatus, status.Reason, status.Message)
329-
coStatusCondition.Reason = status.Reason
330-
coStatusCondition.Message = status.Message
331-
cov1helpers.SetStatusCondition(&co.Status.Conditions, coStatusCondition)
332-
}
333-
switch skewStatus {
334-
case skewUnchecked:
335-
coStatusCondition.Reason = status.Reason
336-
coStatusCondition.Message = status.Message
337-
cov1helpers.SetStatusCondition(&co.Status.Conditions, coStatusCondition)
338-
case skewUnsupported:
339-
coStatusCondition.Reason = status.Reason
340-
coStatusCondition.Message = status.Message
341-
mcoObjectRef := &corev1.ObjectReference{
342-
Kind: co.Kind,
343-
Name: co.Name,
344-
Namespace: co.Namespace,
345-
UID: co.GetUID(),
346-
}
347-
klog.Infof("kubelet skew status: %v, status reason: %v", skewStatus, status.Reason)
348-
optr.eventRecorder.Eventf(mcoObjectRef, corev1.EventTypeWarning, coStatusCondition.Reason, coStatusCondition.Message)
349-
cov1helpers.SetStatusCondition(&co.Status.Conditions, coStatusCondition)
350-
case skewPresent:
351-
coStatusCondition.Reason = status.Reason
352-
coStatusCondition.Message = status.Message
353-
klog.Infof("kubelet skew status: %v, status reason: %v", skewStatus, status.Reason)
321+
msg := fmt.Sprintf("Error checking Nodes for Upgradeable status: %v", err)
322+
klog.Error(msg)
323+
coStatusCondition.Status = configv1.ConditionUnknown
324+
coStatusCondition.Reason = condition.Reason
325+
coStatusCondition.Message = condition.Message
354326
cov1helpers.SetStatusCondition(&co.Status.Conditions, coStatusCondition)
327+
} else if condition.Status != configv1.ConditionTrue {
328+
coStatusCondition.Status = condition.Status
329+
coStatusCondition.Reason = condition.Reason
330+
coStatusCondition.Message = condition.Message
355331
}
356332
}
357333
cov1helpers.SetStatusCondition(&co.Status.Conditions, coStatusCondition)
@@ -525,83 +501,33 @@ func (optr *Operator) cfeEvalCgroupsV1() (bool, error) {
525501
return nodeClusterConfig.Spec.CgroupMode == configv1.CgroupModeV1, nil
526502
}
527503

528-
// isKubeletSkewSupported checks the version skew of kube-apiserver and node kubelet version.
529-
// Returns the skew status. version skew > 2 is not supported.
530-
func (optr *Operator) isKubeletSkewSupported(pools []*mcfgv1.MachineConfigPool) (skewStatus string, coStatus configv1.ClusterOperatorStatusCondition, err error) {
531-
coStatus = configv1.ClusterOperatorStatusCondition{}
532-
kubeAPIServerStatus, err := optr.clusterOperatorLister.Get("kube-apiserver")
533-
if err != nil {
534-
coStatus.Reason = skewUnchecked
535-
coStatus.Message = fmt.Sprintf("An error occurred when checking kubelet version skew: %v", err)
536-
return skewUnchecked, coStatus, err
537-
}
538-
// looks like
539-
// - name: kube-apiserver
540-
// version: 1.21.0-rc.0
541-
kubeAPIServerVersion := ""
542-
for _, version := range kubeAPIServerStatus.Status.Versions {
543-
if version.Name != "kube-apiserver" {
544-
continue
545-
}
546-
kubeAPIServerVersion = version.Version
547-
break
548-
}
549-
if kubeAPIServerVersion == "" {
550-
err = fmt.Errorf("kube-apiserver does not yet have a version")
551-
coStatus.Reason = skewUnchecked
552-
coStatus.Message = fmt.Sprintf("An error occurred when checking kubelet version skew: %v", err.Error())
553-
return skewUnchecked, coStatus, err
554-
}
555-
kubeAPIServerMinorVersion, err := getMinorKubeletVersion(kubeAPIServerVersion)
556-
if err != nil {
557-
coStatus.Reason = skewUnchecked
558-
coStatus.Message = fmt.Sprintf("An error occurred when checking kubelet version skew: %v", err)
559-
return skewUnchecked, coStatus, err
504+
// checkNodeUpgradeable checks current Node status to look for anything incompatible with the next 4.(y+1) OpenShift release.
505+
func (optr *Operator) checkNodeUpgradeable(pools []*mcfgv1.MachineConfigPool) (coStatus configv1.ClusterOperatorStatusCondition, err error) {
506+
coStatus = configv1.ClusterOperatorStatusCondition{
507+
Status: configv1.ConditionTrue,
560508
}
561-
var (
562-
lastError error
563-
kubeletVersion string
564-
)
565509
nodes, err := optr.GetAllManagedNodes(pools)
566510
if err != nil {
567511
err = fmt.Errorf("getting all managed nodes failed: %w", err)
568-
coStatus.Reason = skewUnchecked
569-
coStatus.Message = fmt.Sprintf("An error occurred when getting all the managed nodes: %v", err.Error())
512+
coStatus.Status = configv1.ConditionUnknown
513+
coStatus.Reason = "FailedToGetNodes"
514+
coStatus.Message = err.Error()
515+
return coStatus, err
570516
}
517+
rhelNodes := make([]string, 0, len(nodes))
571518
for _, node := range nodes {
572-
// looks like kubeletVersion: v1.21.0-rc.0+6143dea
573-
kubeletVersion = node.Status.NodeInfo.KubeletVersion
574-
if kubeletVersion == "" {
575-
continue
519+
osImage := node.Status.NodeInfo.OSImage
520+
if strings.HasPrefix(osImage, "Red Hat Enterprise Linux") && !strings.HasPrefix(osImage, "Red Hat Enterprise Linux CoreOS") {
521+
rhelNodes = append(rhelNodes, fmt.Sprintf("%s (%s)", node.Name, osImage))
576522
}
577-
nodeMinorVersion, err := getMinorKubeletVersion(kubeletVersion)
578-
if err != nil {
579-
lastError = err
580-
continue
581-
}
582-
if nodeMinorVersion+2 < kubeAPIServerMinorVersion {
583-
coStatus.Reason = skewUnsupported
584-
coStatus.Message = fmt.Sprintf("One or more nodes have an unsupported kubelet version skew. Please see `oc get nodes` for details and upgrade all nodes so that they have a kubelet version of at least %v.", getMinimalSkewSupportNodeVersion(kubeAPIServerVersion))
585-
return skewUnsupported, coStatus, nil
586-
}
587-
if nodeMinorVersion+2 == kubeAPIServerMinorVersion {
588-
coStatus.Reason = skewPresent
589-
coStatus.Message = fmt.Sprintf("Current kubelet version %v will not be supported by newer kube-apiserver. Please upgrade the kubelet first if plan to upgrade the kube-apiserver", kubeletVersion)
590-
return skewPresent, coStatus, nil
591-
}
592-
}
593-
if kubeletVersion == "" {
594-
err = fmt.Errorf("kubelet does not yet have a version")
595-
coStatus.Reason = skewUnchecked
596-
coStatus.Message = fmt.Sprintf("An error occurred when checking kubelet version skew: %v", err.Error())
597-
return skewUnchecked, coStatus, err
598523
}
599-
if lastError != nil {
600-
coStatus.Reason = skewUnchecked
601-
coStatus.Message = fmt.Sprintf("An error occurred when checking kubelet version skew: %v", err)
602-
return skewUnchecked, coStatus, lastError
524+
sort.Strings(rhelNodes)
525+
if len(rhelNodes) > 0 {
526+
coStatus.Status = configv1.ConditionFalse
527+
coStatus.Reason = "RHELNodes"
528+
coStatus.Message = fmt.Sprintf("%s RHEL nodes, including %s, but OpenShift 4.19 requires RHCOS https://FIXME-DOC-LINK", len(rhelNodes), rhelNodes[0])
603529
}
604-
return skewSupported, coStatus, nil
530+
return coStatus, nil
605531
}
606532

607533
// GetAllManagedNodes returns the nodes managed by MCO

0 commit comments

Comments
 (0)