-
Notifications
You must be signed in to change notification settings - Fork 424
OCPNODE-3029: WIP: handle required minimum kubelet version featuregate rollout #4929
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,7 @@ import ( | |
mcfglistersv1 "github.com/openshift/client-go/machineconfiguration/listers/machineconfiguration/v1" | ||
"github.com/openshift/library-go/pkg/operator/configobserver/featuregates" | ||
"github.com/openshift/machine-config-operator/pkg/apihelpers" | ||
"github.com/openshift/machine-config-operator/pkg/constants" | ||
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" | ||
mtmpl "github.com/openshift/machine-config-operator/pkg/controller/template" | ||
"github.com/openshift/machine-config-operator/pkg/version" | ||
|
@@ -420,7 +421,7 @@ func (ctrl *Controller) handleFeatureErr(err error, key string) { | |
|
||
// generateOriginalKubeletConfigWithFeatureGates generates a KubeletConfig and ensure the correct feature gates are set | ||
// based on the given FeatureGate. | ||
func generateOriginalKubeletConfigWithFeatureGates(cc *mcfgv1.ControllerConfig, templatesDir, role string, featureGateAccess featuregates.FeatureGateAccess, apiServer *configv1.APIServer) (*kubeletconfigv1beta1.KubeletConfiguration, error) { | ||
func generateOriginalKubeletConfigWithFeatureGates(cc *mcfgv1.ControllerConfig, templatesDir, role string, featureGates map[string]bool, apiServer *configv1.APIServer) (*kubeletconfigv1beta1.KubeletConfiguration, error) { | ||
originalKubeletIgn, err := generateOriginalKubeletConfigIgn(cc, templatesDir, role, apiServer) | ||
if err != nil { | ||
return nil, fmt.Errorf("could not generate the original Kubelet config ignition: %w", err) | ||
|
@@ -437,11 +438,6 @@ func generateOriginalKubeletConfigWithFeatureGates(cc *mcfgv1.ControllerConfig, | |
return nil, fmt.Errorf("could not deserialize the Kubelet source: %w", err) | ||
} | ||
|
||
featureGates, err := generateFeatureMap(featureGateAccess, openshiftOnlyFeatureGates...) | ||
if err != nil { | ||
return nil, fmt.Errorf("could not generate features map: %w", err) | ||
} | ||
|
||
// Merge in Feature Gates. | ||
// If they are the same, this will be a no-op | ||
if err := mergo.Merge(&originalKubeConfig.FeatureGates, featureGates, mergo.WithOverride); err != nil { | ||
|
@@ -533,7 +529,7 @@ func (ctrl *Controller) addAnnotation(cfg *mcfgv1.KubeletConfig, annotationKey, | |
// This function is not meant to be invoked concurrently with the same key. | ||
// | ||
//nolint:gocyclo | ||
func (ctrl *Controller) syncKubeletConfig(key string) error { | ||
func (ctrl *Controller) syncKubeletConfig(key string) (retErr error) { | ||
startTime := time.Now() | ||
klog.V(4).Infof("Started syncing kubeletconfig %q (%v)", key, startTime) | ||
defer func() { | ||
|
@@ -600,6 +596,13 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { | |
return ctrl.syncStatusOnly(cfg, err, "could not get the TLSSecurityProfile from %v: %v", ctrlcommon.APIServerInstanceName, err) | ||
} | ||
|
||
updatedPools := map[string]int64{} | ||
|
||
featureGates, renderedVersions, err := generateFeatureMap(ctrl.featureGateAccess, openshiftOnlyFeatureGates...) | ||
if err != nil { | ||
return fmt.Errorf("could not generate features map: %w", err) | ||
} | ||
|
||
for _, pool := range mcpPools { | ||
if pool.Spec.Configuration.Name == "" { | ||
updateDelay := 5 * time.Second | ||
|
@@ -634,7 +637,7 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { | |
return fmt.Errorf("could not get ControllerConfig %w", err) | ||
} | ||
|
||
originalKubeConfig, err := generateOriginalKubeletConfigWithFeatureGates(cc, ctrl.templatesDir, role, ctrl.featureGateAccess, apiServer) | ||
originalKubeConfig, err := generateOriginalKubeletConfigWithFeatureGates(cc, ctrl.templatesDir, role, featureGates, apiServer) | ||
if err != nil { | ||
return ctrl.syncStatusOnly(cfg, err, "could not get original kubelet config: %v", err) | ||
} | ||
|
@@ -725,13 +728,73 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { | |
} | ||
klog.Infof("Applied KubeletConfig %v on MachineConfigPool %v", key, pool.Name) | ||
ctrlcommon.UpdateStateMetric(ctrlcommon.MCCSubControllerState, "machine-config-controller-kubelet-config", "Sync Kubelet Config", pool.Name) | ||
updatedPools[pool.Name] = pool.Status.ObservedGeneration | ||
} | ||
go ctrl.writebackMinimumKubeletVersionIfAppropriate(updatedPools, renderedVersions, nodeConfig, func() ([]*mcfgv1.MachineConfigPool, error) { | ||
return ctrl.getPoolsForKubeletConfig(cfg) | ||
}) | ||
if err := ctrl.cleanUpDuplicatedMC(managedKubeletConfigKeyPrefix); err != nil { | ||
return err | ||
} | ||
return ctrl.syncStatusOnly(cfg, nil) | ||
} | ||
|
||
func (ctrl *Controller) writebackMinimumKubeletVersionIfAppropriate(updatedPools map[string]int64, renderedVersions []configv1.MinimumComponentVersion, node *osev1.Node, poolGetter func() ([]*mcfgv1.MachineConfigPool, error)) { | ||
renderedKubeletVersion := "" | ||
for _, cv := range renderedVersions { | ||
if cv.Component == configv1.MinimumComponentKubelet { | ||
renderedKubeletVersion = cv.Version | ||
} | ||
} | ||
if node.Spec.MinimumKubeletVersion == node.Status.MinimumKubeletVersion && | ||
node.Status.MinimumKubeletVersion == renderedKubeletVersion { | ||
klog.InfoS("Skipping writeback to nodes.config.Status.MinimumKubeletVersion because situation not correct", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: why are these conditions not correct? isn't this first one just saying that the spec and status match what we expect? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the status matches the spec, no need to do the update (though I probably should actually check renderedKubeletVersion there too) |
||
"nodes.config.Spec.MinimumKubeletVerison", node.Spec.MinimumKubeletVersion, | ||
"nodes.config.Status.MinimumKubeletVerison", node.Status.MinimumKubeletVersion, | ||
"renderedKubeletVersion", renderedKubeletVersion) | ||
return | ||
} | ||
|
||
// This featuregate rollout was done as a result of a new minimum kubelet version rolling out, which means we need to wait for at least one | ||
// node in each MCP to finish updating before we set the spec. | ||
if err := wait.ExponentialBackoff(constants.NodeUpdateBackoff, func() (bool, error) { | ||
mcps, err := poolGetter() | ||
if err != nil { | ||
return true, err | ||
} | ||
allUpdated := true | ||
for _, mcp := range mcps { | ||
if oldGeneration, ok := updatedPools[mcp.Name]; ok && (mcp.Status.UpdatedMachineCount == 0 && mcp.Status.ObservedGeneration > oldGeneration) { | ||
allUpdated = false | ||
} | ||
} | ||
return allUpdated, nil | ||
}); err != nil { | ||
klog.Errorf("Failed to update rendered kubelet version: %v", err) | ||
} | ||
|
||
if err := retry.RetryOnConflict(updateBackoff, func() error { | ||
// Fetch the NodeConfig | ||
nodeConfig, err := ctrl.nodeConfigLister.Get(ctrlcommon.ClusterNodeInstanceName) | ||
if macherrors.IsNotFound(err) { | ||
nodeConfig = createNewDefaultNodeconfig() | ||
} | ||
if nodeConfig.Spec.MinimumKubeletVersion != renderedKubeletVersion { | ||
// skip this update, as we no longer want this spec value | ||
return nil | ||
} | ||
if nodeConfig.Status.MinimumKubeletVersion == renderedKubeletVersion { | ||
// this happened somewhere else, skip | ||
return nil | ||
} | ||
nodeConfig.Status.MinimumKubeletVersion = renderedKubeletVersion | ||
_, err = ctrl.configClient.ConfigV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{}) | ||
return err | ||
}); err != nil { | ||
klog.Errorf("Failed to update rendered kubelet version to node status: %v", err) | ||
} | ||
} | ||
|
||
// cleanUpDuplicatedMC removes the MC of non-updated GeneratedByControllerVersionKey if its name contains 'generated-kubelet'. | ||
// BZ 1955517: upgrade when there are more than one configs, the duplicated and upgraded MC will be generated (func getManagedKubeletConfigKey()) | ||
// MC with old GeneratedByControllerVersionKey fails the upgrade. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any particular reason this is a separate goroutine?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought it could be async in case it takes a while to have the mcps rollback and I didn't know what could be blocked by having it sync
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think a better solution might be to push this onto one of the work queues by doing something like this:
We'll probably want to use a
sync.Map
since the work queue has multiple workers that could mutate the controller struct state at any given time.