diff --git a/cmd/machine-config-controller/start.go b/cmd/machine-config-controller/start.go index ad3be8d1ab..bbd5a8ea78 100644 --- a/cmd/machine-config-controller/start.go +++ b/cmd/machine-config-controller/start.go @@ -79,6 +79,7 @@ func runStartCmd(_ *cobra.Command, _ []string) { draincontroller := drain.New( drain.DefaultConfig(), ctrlctx.KubeInformerFactory.Core().V1().Nodes(), + ctrlctx.InformerFactory.Machineconfiguration().V1().MachineConfigPools(), ctrlctx.ClientBuilder.KubeClientOrDie("node-update-controller"), ctrlctx.ClientBuilder.MachineConfigClientOrDie("node-update-controller"), ctrlctx.FeatureGateAccess, diff --git a/cmd/machine-config-daemon/start.go b/cmd/machine-config-daemon/start.go index fb3585cc0e..c3c437d37e 100644 --- a/cmd/machine-config-daemon/start.go +++ b/cmd/machine-config-daemon/start.go @@ -186,6 +186,7 @@ func runStartCmd(_ *cobra.Command, _ []string) { ctrlctx.InformerFactory.Machineconfiguration().V1().MachineConfigs(), ctrlctx.KubeInformerFactory.Core().V1().Nodes(), ctrlctx.InformerFactory.Machineconfiguration().V1().ControllerConfigs(), + ctrlctx.InformerFactory.Machineconfiguration().V1().MachineConfigPools(), ctrlctx.ClientBuilder.OperatorClientOrDie(componentName), startOpts.kubeletHealthzEnabled, startOpts.kubeletHealthzEndpoint, diff --git a/pkg/controller/drain/drain_controller.go b/pkg/controller/drain/drain_controller.go index ab699ced05..8566e55c3e 100644 --- a/pkg/controller/drain/drain_controller.go +++ b/pkg/controller/drain/drain_controller.go @@ -13,6 +13,7 @@ import ( "github.com/openshift/library-go/pkg/operator/configobserver/featuregates" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants" + "github.com/openshift/machine-config-operator/pkg/helpers" "github.com/openshift/machine-config-operator/pkg/upgrademonitor" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -23,6 +24,8 @@ import ( "k8s.io/apimachinery/pkg/util/strategicpatch" "k8s.io/apimachinery/pkg/util/wait" + mcfginformersv1 "github.com/openshift/client-go/machineconfiguration/informers/externalversions/machineconfiguration/v1" + mcfglistersv1 "github.com/openshift/client-go/machineconfiguration/listers/machineconfiguration/v1" corev1 "k8s.io/api/core/v1" coreinformersv1 "k8s.io/client-go/informers/core/v1" clientset "k8s.io/client-go/kubernetes" @@ -98,6 +101,9 @@ type Controller struct { nodeLister corelisterv1.NodeLister nodeListerSynced cache.InformerSynced + mcpLister mcfglistersv1.MachineConfigPoolLister + mcpListerSynced cache.InformerSynced + queue workqueue.TypedRateLimitingInterface[string] ongoingDrains map[string]time.Time @@ -110,6 +116,7 @@ type Controller struct { func New( cfg Config, nodeInformer coreinformersv1.NodeInformer, + mcpInformer mcfginformersv1.MachineConfigPoolInformer, kubeClient clientset.Interface, mcfgClient mcfgclientset.Interface, fgAccessor featuregates.FeatureGateAccess, @@ -140,6 +147,9 @@ func New( ctrl.nodeLister = nodeInformer.Lister() ctrl.nodeListerSynced = nodeInformer.Informer().HasSynced + ctrl.mcpLister = mcpInformer.Lister() + ctrl.mcpListerSynced = mcpInformer.Informer().HasSynced + return ctrl } @@ -159,7 +169,7 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer ctrl.queue.ShutDown() - if !cache.WaitForCacheSync(stopCh, ctrl.nodeListerSynced) { + if !cache.WaitForCacheSync(stopCh, ctrl.nodeListerSynced, ctrl.mcpListerSynced) { return } @@ -305,6 +315,12 @@ func (ctrl *Controller) syncNode(key string) error { Ctx: context.TODO(), } + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(ctrl.mcpLister, node) + if err != nil { + return err + } + desiredVerb := strings.Split(desiredState, "-")[0] switch desiredVerb { case daemonconsts.DrainerStateUncordon: @@ -318,6 +334,7 @@ func (ctrl *Controller) syncNode(key string) error { node, ctrl.client, ctrl.featureGatesAccessor, + pool, ) if nErr != nil { klog.Errorf("Error making MCN for Uncordon failure: %v", err) @@ -333,6 +350,7 @@ func (ctrl *Controller) syncNode(key string) error { node, ctrl.client, ctrl.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for UnCordon success: %v", err) @@ -387,6 +405,12 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro break } + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(ctrl.mcpLister, node) + if err != nil { + return err + } + if !isOngoingDrain { ctrl.logNode(node, "cordoning") // perform cordon @@ -398,6 +422,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro node, ctrl.client, ctrl.featureGatesAccessor, + pool, ) if Nerr != nil { klog.Errorf("Error making MCN for Cordon Failure: %v", Nerr) @@ -412,6 +437,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro node, ctrl.client, ctrl.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Cordon Success: %v", err) @@ -420,7 +446,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro // Attempt drain ctrl.logNode(node, "initiating drain") - err := upgrademonitor.GenerateAndApplyMachineConfigNodes( + err = upgrademonitor.GenerateAndApplyMachineConfigNodes( &upgrademonitor.Condition{State: v1alpha1.MachineConfigNodeUpdateExecuted, Reason: string(v1alpha1.MachineConfigNodeUpdateDrained), Message: "Draining Node as part of update executed phase"}, &upgrademonitor.Condition{State: v1alpha1.MachineConfigNodeUpdateDrained, Reason: fmt.Sprintf("%s%s", string(v1alpha1.MachineConfigNodeUpdateExecuted), string(v1alpha1.MachineConfigNodeUpdateDrained)), Message: fmt.Sprintf("Draining node. The drain will not be complete until desired drainer %s matches current drainer %s", node.Annotations[daemonconsts.DesiredDrainerAnnotationKey], node.Annotations[daemonconsts.LastAppliedDrainerAnnotationKey])}, metav1.ConditionUnknown, @@ -428,6 +454,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro node, ctrl.client, ctrl.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Drain beginning: %v", err) @@ -457,6 +484,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro node, ctrl.client, ctrl.featureGatesAccessor, + pool, ) if nErr != nil { klog.Errorf("Error making MCN for Drain failure: %v", nErr) @@ -473,6 +501,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro node, ctrl.client, ctrl.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Drain success: %v", err) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index ce59597a53..fddb174e96 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -51,9 +51,9 @@ import ( mcoResourceRead "github.com/openshift/machine-config-operator/lib/resourceread" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" "github.com/openshift/machine-config-operator/pkg/daemon/constants" - "github.com/openshift/machine-config-operator/pkg/upgrademonitor" - "github.com/openshift/machine-config-operator/pkg/daemon/osrelease" + "github.com/openshift/machine-config-operator/pkg/helpers" + "github.com/openshift/machine-config-operator/pkg/upgrademonitor" ) // Daemon is the dispatch point for the functions of the agent on the @@ -100,6 +100,9 @@ type Daemon struct { mcLister mcfglistersv1.MachineConfigLister mcListerSynced cache.InformerSynced + mcpLister mcfglistersv1.MachineConfigPoolLister + mcpListerSynced cache.InformerSynced + ccLister mcfglistersv1.ControllerConfigLister ccListerSynced cache.InformerSynced @@ -362,6 +365,7 @@ func (dn *Daemon) ClusterConnect( mcInformer mcfginformersv1.MachineConfigInformer, nodeInformer coreinformersv1.NodeInformer, ccInformer mcfginformersv1.ControllerConfigInformer, + mcpInformer mcfginformersv1.MachineConfigPoolInformer, mcopClient mcopclientset.Interface, kubeletHealthzEnabled bool, kubeletHealthzEndpoint string, @@ -396,6 +400,8 @@ func (dn *Daemon) ClusterConnect( }) dn.ccLister = ccInformer.Lister() dn.ccListerSynced = ccInformer.Informer().HasSynced + dn.mcpLister = mcpInformer.Lister() + dn.mcpListerSynced = mcpInformer.Informer().HasSynced nw, err := newNodeWriter(dn.name, dn.stopCh) if err != nil { @@ -705,6 +711,12 @@ func (dn *Daemon) syncNode(key string) error { return nil } + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, node) + if err != nil { + return err + } + if node.Annotations[constants.MachineConfigDaemonPostConfigAction] == constants.MachineConfigDaemonStateRebooting { klog.Info("Detected Rebooting Annotation, applying MCN.") err := upgrademonitor.GenerateAndApplyMachineConfigNodes( @@ -715,6 +727,7 @@ func (dn *Daemon) syncNode(key string) error { node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Rebooted: %v", err) @@ -790,6 +803,7 @@ func (dn *Daemon) syncNode(key string) error { node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Resumed true: %v", err) @@ -828,6 +842,7 @@ func (dn *Daemon) syncNode(key string) error { dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Updated false: %v", err) @@ -852,6 +867,7 @@ func (dn *Daemon) syncNode(key string) error { dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Updated: %v", err) @@ -1379,7 +1395,7 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error, errCh chan er defer dn.ccQueue.ShutDown() defer dn.preserveDaemonLogs() - if !cache.WaitForCacheSync(stopCh, dn.nodeListerSynced, dn.mcListerSynced, dn.ccListerSynced) { + if !cache.WaitForCacheSync(stopCh, dn.nodeListerSynced, dn.mcListerSynced, dn.ccListerSynced, dn.mcpListerSynced) { return fmt.Errorf("failed to sync initial listers cache") } @@ -2300,6 +2316,13 @@ func (dn *Daemon) updateConfigAndState(state *stateAndConfigs) (bool, bool, erro if inDesiredConfig { // Great, we've successfully rebooted for the desired config, // let's mark it done! + + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node) + if err != nil { + return missingODC, inDesiredConfig, err + } + err = upgrademonitor.GenerateAndApplyMachineConfigNodes( &upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeResumed, Reason: string(mcfgalphav1.MachineConfigNodeResumed), Message: fmt.Sprintf("In desired config %s. Resumed normal operations. Applying proper annotations.", state.currentConfig.Name)}, nil, @@ -2308,6 +2331,7 @@ func (dn *Daemon) updateConfigAndState(state *stateAndConfigs) (bool, bool, erro dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Resumed true: %v", err) diff --git a/pkg/daemon/daemon_test.go b/pkg/daemon/daemon_test.go index 63dc915827..e4ff64b164 100644 --- a/pkg/daemon/daemon_test.go +++ b/pkg/daemon/daemon_test.go @@ -161,6 +161,7 @@ func (f *fixture) newController() *Daemon { i.Machineconfiguration().V1().MachineConfigs(), k8sI.Core().V1().Nodes(), i.Machineconfiguration().V1().ControllerConfigs(), + i.Machineconfiguration().V1().MachineConfigPools(), f.oclient, false, "", @@ -169,6 +170,7 @@ func (f *fixture) newController() *Daemon { d.mcListerSynced = alwaysReady d.nodeListerSynced = alwaysReady + d.mcpListerSynced = alwaysReady stopCh := make(chan struct{}) defer close(stopCh) diff --git a/pkg/daemon/drain.go b/pkg/daemon/drain.go index 9bf0349b96..950e8b9719 100644 --- a/pkg/daemon/drain.go +++ b/pkg/daemon/drain.go @@ -15,6 +15,8 @@ import ( "github.com/openshift/machine-config-operator/pkg/apihelpers" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" "github.com/openshift/machine-config-operator/pkg/daemon/constants" + + "github.com/openshift/machine-config-operator/pkg/helpers" "github.com/openshift/machine-config-operator/pkg/upgrademonitor" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -39,7 +41,14 @@ func (dn *Daemon) performDrain() error { if !dn.drainRequired() { logSystem("Drain not required, skipping") - err := upgrademonitor.GenerateAndApplyMachineConfigNodes( + + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node) + if err != nil { + return err + } + + err = upgrademonitor.GenerateAndApplyMachineConfigNodes( &upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgalphav1.MachineConfigNodeUpdateDrained), Message: "Node Drain Not required for this update."}, &upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdateDrained, Reason: fmt.Sprintf("%s%s", string(mcfgalphav1.MachineConfigNodeUpdateExecuted), string(mcfgalphav1.MachineConfigNodeUpdateDrained)), Message: "Node Drain Not required for this update."}, metav1.ConditionUnknown, @@ -47,6 +56,7 @@ func (dn *Daemon) performDrain() error { dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Drain not required: %v", err) diff --git a/pkg/daemon/pinned_image_set.go b/pkg/daemon/pinned_image_set.go index 4ae64a4b31..8831f98821 100644 --- a/pkg/daemon/pinned_image_set.go +++ b/pkg/daemon/pinned_image_set.go @@ -544,6 +544,12 @@ func (p *PinnedImageSetManager) updateStatusProgressing(pools []*mcfgv1.MachineC } imageSetSpec := getPinnedImageSetSpecForPools(pools) + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(p.mcpLister, node) + if err != nil { + return err + } + return upgrademonitor.UpdateMachineConfigNodeStatus( &upgrademonitor.Condition{ State: mcfgv1alpha1.MachineConfigNodePinnedImageSetsProgressing, @@ -558,6 +564,7 @@ func (p *PinnedImageSetManager) updateStatusProgressing(pools []*mcfgv1.MachineC applyCfg, imageSetSpec, p.featureGatesAccessor, + pool, ) } @@ -574,6 +581,12 @@ func (p *PinnedImageSetManager) updateStatusProgressingComplete(pools []*mcfgv1. } imageSetSpec := getPinnedImageSetSpecForPools(pools) + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(p.mcpLister, node) + if err != nil { + return err + } + err = upgrademonitor.UpdateMachineConfigNodeStatus( &upgrademonitor.Condition{ State: mcfgv1alpha1.MachineConfigNodePinnedImageSetsProgressing, @@ -588,6 +601,7 @@ func (p *PinnedImageSetManager) updateStatusProgressingComplete(pools []*mcfgv1. applyCfg, imageSetSpec, p.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Failed to updated machine config node: %v", err) @@ -608,6 +622,7 @@ func (p *PinnedImageSetManager) updateStatusProgressingComplete(pools []*mcfgv1. nil, nil, p.featureGatesAccessor, + pool, ) } @@ -632,6 +647,12 @@ func (p *PinnedImageSetManager) updateStatusError(pools []*mcfgv1.MachineConfigP errMsg = statusErr.Error() } + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(p.mcpLister, node) + if err != nil { + return err + } + return upgrademonitor.UpdateMachineConfigNodeStatus( &upgrademonitor.Condition{ State: mcfgv1alpha1.MachineConfigNodePinnedImageSetsDegraded, @@ -646,6 +667,7 @@ func (p *PinnedImageSetManager) updateStatusError(pools []*mcfgv1.MachineConfigP applyCfg, imageSetSpec, p.featureGatesAccessor, + pool, ) } diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 8692d310c6..f4117f9c50 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -43,6 +43,7 @@ import ( pivottypes "github.com/openshift/machine-config-operator/pkg/daemon/pivot/types" pivotutils "github.com/openshift/machine-config-operator/pkg/daemon/pivot/utils" "github.com/openshift/machine-config-operator/pkg/daemon/runtimeassets" + "github.com/openshift/machine-config-operator/pkg/helpers" "github.com/openshift/machine-config-operator/pkg/upgrademonitor" ) @@ -123,7 +124,13 @@ func (dn *Daemon) executeReloadServiceNodeDisruptionAction(serviceName string, r return fmt.Errorf("could not apply update: reloading %s configuration failed. Error: %w", serviceName, reloadErr) } - err := upgrademonitor.GenerateAndApplyMachineConfigNodes( + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node) + if err != nil { + return err + } + + err = upgrademonitor.GenerateAndApplyMachineConfigNodes( &upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdatePostActionComplete, Reason: string(mcfgalphav1.MachineConfigNodeUpdateReloaded), Message: fmt.Sprintf("Node has reloaded service %s", serviceName)}, &upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdateReloaded, Reason: fmt.Sprintf("%s%s", string(mcfgalphav1.MachineConfigNodeUpdatePostActionComplete), string(mcfgalphav1.MachineConfigNodeUpdateReloaded)), Message: fmt.Sprintf("Upgrade required a service %s reload. Completed this this as a post update action.", serviceName)}, metav1.ConditionTrue, @@ -131,6 +138,7 @@ func (dn *Daemon) executeReloadServiceNodeDisruptionAction(serviceName string, r dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Reloading success: %v", err) @@ -157,6 +165,12 @@ func (dn *Daemon) performPostConfigChangeNodeDisruptionAction(postConfigChangeAc logSystem("Performing post config change action: %v for config %s", action.Type, configName) + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node) + if err != nil { + return err + } + switch action.Type { case opv1.RebootStatusAction: err := upgrademonitor.GenerateAndApplyMachineConfigNodes( @@ -167,6 +181,7 @@ func (dn *Daemon) performPostConfigChangeNodeDisruptionAction(postConfigChangeAc dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for rebooting: %v", err) @@ -186,6 +201,7 @@ func (dn *Daemon) performPostConfigChangeNodeDisruptionAction(postConfigChangeAc dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for no post config change action: %v", err) @@ -253,6 +269,12 @@ func (dn *Daemon) performPostConfigChangeNodeDisruptionAction(postConfigChangeAc // In the end uncordon node to schedule workload. // If at any point an error occurs, we reboot the node so that node has correct configuration. func (dn *Daemon) performPostConfigChangeAction(postConfigChangeActions []string, configName string) error { + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node) + if err != nil { + return err + } + if ctrlcommon.InSlice(postConfigChangeActionReboot, postConfigChangeActions) { err := upgrademonitor.GenerateAndApplyMachineConfigNodes( &upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdatePostActionComplete, Reason: string(mcfgalphav1.MachineConfigNodeUpdateRebooted), Message: fmt.Sprintf("Node will reboot into config %s", configName)}, @@ -262,6 +284,7 @@ func (dn *Daemon) performPostConfigChangeAction(postConfigChangeActions []string dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for rebooting: %v", err) @@ -282,6 +305,7 @@ func (dn *Daemon) performPostConfigChangeAction(postConfigChangeActions []string dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for no post config change action: %v", err) @@ -307,6 +331,7 @@ func (dn *Daemon) performPostConfigChangeAction(postConfigChangeActions []string dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Reloading success: %v", err) @@ -1087,8 +1112,13 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi return fmt.Errorf("parsing new Ignition config failed: %w", err) } - klog.Infof("Checking Reconcilable for config %v to %v", oldConfigName, newConfigName) + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node) + if err != nil { + return err + } + klog.Infof("Checking Reconcilable for config %v to %v", oldConfigName, newConfigName) // checking for reconcilability // make sure we can actually reconcile this state diff, reconcilableError := reconcilable(oldConfig, newConfig) @@ -1102,6 +1132,7 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if Nerr != nil { klog.Errorf("Error making MCN for Preparing update failed: %v", err) @@ -1148,6 +1179,7 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if Nerr != nil { klog.Errorf("Error making MCN for Preparing update failed: %v", err) @@ -1182,19 +1214,11 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Update Compatible: %v", err) } - pool := "" - var ok bool - if dn.node != nil { - if _, ok = dn.node.Labels["node-role.kubernetes.io/worker"]; ok { - pool = "worker" - } else if _, ok = dn.node.Labels["node-role.kubernetes.io/master"]; ok { - pool = "master" - } - } err = upgrademonitor.GenerateAndApplyMachineConfigNodeSpec(dn.featureGatesAccessor, pool, dn.node, dn.mcfgClient) if err != nil { @@ -1214,6 +1238,7 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Drain not required: %v", err) @@ -1241,6 +1266,7 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Updating Files and OS: %v", err) @@ -1353,6 +1379,7 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi dn.node, dn.mcfgClient, dn.featureGatesAccessor, + pool, ) if err != nil { klog.Errorf("Error making MCN for Updated Files and OS: %v", err) diff --git a/pkg/daemon/upgrade_monitor_test.go b/pkg/daemon/upgrade_monitor_test.go index 8ea1a20362..53fa0cec7a 100644 --- a/pkg/daemon/upgrade_monitor_test.go +++ b/pkg/daemon/upgrade_monitor_test.go @@ -11,13 +11,13 @@ import ( "github.com/openshift/api/machineconfiguration/v1alpha1" "github.com/openshift/client-go/machineconfiguration/clientset/versioned/fake" informers "github.com/openshift/client-go/machineconfiguration/informers/externalversions" + mcopfake "github.com/openshift/client-go/operator/clientset/versioned/fake" "github.com/openshift/library-go/pkg/operator/configobserver/featuregates" + "github.com/openshift/machine-config-operator/pkg/helpers" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" kubeinformers "k8s.io/client-go/informers" k8sfake "k8s.io/client-go/kubernetes/fake" - - mcopfake "github.com/openshift/client-go/operator/clientset/versioned/fake" ) type upgradeMonitorTestCase struct { @@ -128,6 +128,7 @@ func (tc upgradeMonitorTestCase) run(t *testing.T) { i.Machineconfiguration().V1().MachineConfigs(), k8sI.Core().V1().Nodes(), i.Machineconfiguration().V1().ControllerConfigs(), + i.Machineconfiguration().V1().MachineConfigPools(), f.oclient, false, "", @@ -136,6 +137,7 @@ func (tc upgradeMonitorTestCase) run(t *testing.T) { d.mcListerSynced = alwaysReady d.nodeListerSynced = alwaysReady + d.mcpListerSynced = alwaysReady i.Start(stopCh) i.WaitForCacheSync(stopCh) @@ -151,7 +153,13 @@ func (tc upgradeMonitorTestCase) run(t *testing.T) { } for _, n := range f.nodeLister { - err = upgrademonitor.GenerateAndApplyMachineConfigNodes(tc.parentCondition, tc.childCondition, tc.parentStatus, tc.childStatus, n, d.mcfgClient, d.featureGatesAccessor) + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(d.mcpLister, n) + if err != nil { + f.t.Fatalf("Error getting primary pool for node: %v", n.Name) + } + + err = upgrademonitor.GenerateAndApplyMachineConfigNodes(tc.parentCondition, tc.childCondition, tc.parentStatus, tc.childStatus, n, d.mcfgClient, d.featureGatesAccessor, pool) if err != nil { f.t.Fatalf("Could not generate and apply MCN %v", err) } diff --git a/pkg/helpers/helpers.go b/pkg/helpers/helpers.go index f8c4b71a59..ef8d79103b 100644 --- a/pkg/helpers/helpers.go +++ b/pkg/helpers/helpers.go @@ -50,6 +50,31 @@ func GetNodesForPool(mcpLister v1.MachineConfigPoolLister, nodeLister corev1list return nodes, nil } +// GetPrimaryPoolNameForMCN gets the MCP pool name value that is used in a node's MachineConfigNode object. +// When the node does not yet exist (is nil) or the node does not yet have annotations, the pool name will +// temporarily be set to `unknown`. Once the node exists (is not nil) and the annotations are properly set, +// the node will update again and the pool name will be updated. +func GetPrimaryPoolNameForMCN(mcpLister v1.MachineConfigPoolLister, node *corev1.Node) (string, error) { + // Handle case of nil node + if node == nil { + klog.Error("node object is nil, setting associated MCP to unknown") + return "unknown", nil + } + + // Use `GetPrimaryPoolForNode` to get primary MCP associated with node + primaryPool, err := GetPrimaryPoolForNode(mcpLister, node) + if err != nil { + klog.Errorf("error getting primary pool for node: %v", node.Name) + return "", err + } else if primaryPool == nil { + // On first provisioning, the node may not have annoatations and, thus, will not be associated with a pool. + // In this case, the pool value will be set to a temporary dummy value. + klog.Infof("No primary pool is associated with node: %v", node.Name) + return "unknown", nil + } + return primaryPool.Name, nil +} + func GetPrimaryPoolForNode(mcpLister v1.MachineConfigPoolLister, node *corev1.Node) (*mcfgv1.MachineConfigPool, error) { pools, _, err := GetPoolsForNode(mcpLister, node) if err != nil { diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 16dc657073..532e147bf1 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -292,7 +292,7 @@ func (optr *Operator) syncRenderConfig(_ *renderConfig, _ *configv1.ClusterOpera if optr.inClusterBringup { klog.V(4).Info("Starting inClusterBringup informers cache sync") // sync now our own informers after having installed the CRDs - if !cache.WaitForCacheSync(optr.stopCh, optr.ccListerSynced) { + if !cache.WaitForCacheSync(optr.stopCh, optr.ccListerSynced, optr.mcpListerSynced) { return fmt.Errorf("failed to sync caches for informers") } klog.V(4).Info("Finished inClusterBringup informers cache sync") @@ -768,15 +768,13 @@ func (optr *Operator) syncMachineConfigNodes(_ *renderConfig, _ *configv1.Cluste if node.Status.Phase == corev1.NodePending || node.Status.Phase == corev1.NodePhase("Provisioning") { continue } - var pool string - var ok bool - if _, ok = node.Labels["node-role.kubernetes.io/worker"]; ok { - pool = "worker" - } else if _, ok = node.Labels["node-role.kubernetes.io/master"]; ok { - pool = "master" - } else if _, ok = node.Labels["node-role.kubernetes.io/arbiter"]; ok { - pool = "arbiter" + + // Get MCP associated with node + pool, err := helpers.GetPrimaryPoolNameForMCN(optr.mcpLister, node) + if err != nil { + return err } + newMCS := &v1alpha1.MachineConfigNode{ Spec: v1alpha1.MachineConfigNodeSpec{ Node: v1alpha1.MCOObjectReference{ diff --git a/pkg/upgrademonitor/upgrade_monitor.go b/pkg/upgrademonitor/upgrade_monitor.go index 55deb62248..7b7ad9d008 100644 --- a/pkg/upgrademonitor/upgrade_monitor.go +++ b/pkg/upgrademonitor/upgrade_monitor.go @@ -28,10 +28,11 @@ type Condition struct { Message string } -// GenerateAndApplyMachineConfigNodes takes a parent and child conditions and applies them to the given node's MachineConfigNode object -// there are a few stipulations. 1) if the parent and child condition exactly match their currently applied statuses, no new MCN is generated +// GenerateAndApplyMachineConfigNodes takes a parent and child condition and applies them to the given node's MachineConfigNode object +// there are a few stipulations: +// 1) if the parent and child condition exactly match their currently applied statuses, no new MCN is generated // 2) the desiredConfig in the MCN Status will only be set once the update is proven to be compatible. Meanwhile the desired and current config in the spec react to live changes of state on the Node -// 3) None of this will be executed unless the TechPreviewNoUpgrade featuregate is applied. +// 3) none of this will be executed unless the TechPreviewNoUpgrade featuregate is applied. //TODO (ijanssen): Remove comment once feature gate is graduated to default. func GenerateAndApplyMachineConfigNodes( parentCondition, childCondition *Condition, @@ -40,8 +41,9 @@ func GenerateAndApplyMachineConfigNodes( node *corev1.Node, mcfgClient mcfgclientset.Interface, fgAccessor featuregates.FeatureGateAccess, + pool string, ) error { - return generateAndApplyMachineConfigNodes(parentCondition, childCondition, parentStatus, childStatus, node, mcfgClient, nil, nil, fgAccessor) + return generateAndApplyMachineConfigNodes(parentCondition, childCondition, parentStatus, childStatus, node, mcfgClient, nil, nil, fgAccessor, pool) } func UpdateMachineConfigNodeStatus( @@ -54,8 +56,9 @@ func UpdateMachineConfigNodeStatus( imageSetApplyConfig []*machineconfigurationalphav1.MachineConfigNodeStatusPinnedImageSetApplyConfiguration, imageSetSpec []mcfgalphav1.MachineConfigNodeSpecPinnedImageSet, fgAccessor featuregates.FeatureGateAccess, + pool string, ) error { - return generateAndApplyMachineConfigNodes(parentCondition, childCondition, parentStatus, childStatus, node, mcfgClient, imageSetApplyConfig, imageSetSpec, fgAccessor) + return generateAndApplyMachineConfigNodes(parentCondition, childCondition, parentStatus, childStatus, node, mcfgClient, imageSetApplyConfig, imageSetSpec, fgAccessor, pool) } // Helper function to convert metav1.Condition to ConditionApplyConfiguration @@ -89,6 +92,7 @@ func generateAndApplyMachineConfigNodes( imageSetApplyConfig []*machineconfigurationalphav1.MachineConfigNodeStatusPinnedImageSetApplyConfiguration, imageSetSpec []mcfgalphav1.MachineConfigNodeSpecPinnedImageSet, fgAccessor featuregates.FeatureGateAccess, + pool string, ) error { if fgAccessor == nil || node == nil || parentCondition == nil || mcfgClient == nil { return nil @@ -102,14 +106,6 @@ func generateAndApplyMachineConfigNodes( return nil } - var pool string - var ok bool - if _, ok = node.Labels["node-role.kubernetes.io/worker"]; ok { - pool = "worker" - } else if _, ok = node.Labels["node-role.kubernetes.io/master"]; ok { - pool = "master" - } - // get the existing MCN, or if it DNE create one below mcNode, needNewMCNode := createOrGetMachineConfigNode(mcfgClient, node) newMCNode := mcNode.DeepCopy() diff --git a/test/e2e-techpreview/mcn_test.go b/test/e2e-techpreview/mcn_test.go index 3eb8f75b8c..6e1fc0ce79 100644 --- a/test/e2e-techpreview/mcn_test.go +++ b/test/e2e-techpreview/mcn_test.go @@ -2,12 +2,15 @@ package e2e_techpreview_test import ( "bytes" + "context" "os/exec" "testing" "github.com/openshift/machine-config-operator/test/framework" "github.com/openshift/machine-config-operator/test/helpers" "github.com/stretchr/testify/require" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func TestMCNScopeSadPath(t *testing.T) { @@ -53,3 +56,37 @@ func TestMCNScopeHappyPath(t *testing.T) { // This oc command effectively use the service account of the nodeUnderTest's MCD pod, which should only be able to edit nodeUnderTest's MCN. helpers.ExecCmdOnNode(t, cs, nodeUnderTest, "chroot", "/rootfs", "oc", "patch", "machineconfignodes", nodeUnderTest.Name, "--type=merge", "-p", "{\"spec\":{\"configVersion\":{\"desired\":\"rendered-worker-test\"}}}") } + +// `TestMCNPoolNameDefault` checks that the MCP name is correctly populated in a node's MCN object for default MCPs +func TestMCNPoolNameDefault(t *testing.T) { + + cs := framework.NewClientSet("") + + // Grab a random node from each default pool + workerNode := helpers.GetRandomNode(t, cs, "worker") + masterNode := helpers.GetRandomNode(t, cs, "master") + + // Test that MCN pool name value matches MCP association + workerNodeMCN, workerErr := cs.MachineconfigurationV1alpha1Interface.MachineConfigNodes().Get(context.TODO(), workerNode.Name, metav1.GetOptions{}) + require.Equal(t, "worker", workerNodeMCN.Spec.Pool.Name) + require.NoError(t, workerErr) + masterNodeMCN, masterErr := cs.MachineconfigurationV1alpha1Interface.MachineConfigNodes().Get(context.TODO(), masterNode.Name, metav1.GetOptions{}) + require.Equal(t, "master", masterNodeMCN.Spec.Pool.Name) + require.NoError(t, masterErr) +} + +// `TestMCNPoolNameCustom` checks that the MCP name is correctly populated in a node's MCN object for custom MCPs +func TestMCNPoolNameCustom(t *testing.T) { + + cs := framework.NewClientSet("") + + // Create a custom MCP and assign a worker node to it + customMCPName := "infra" + customNode := helpers.GetRandomNode(t, cs, "worker") + helpers.CreatePoolWithNode(t, cs, customMCPName, customNode) + + // Test that MCN pool name value matches MCP association + customNodeMCN, customErr := cs.MachineconfigurationV1alpha1Interface.MachineConfigNodes().Get(context.TODO(), customNode.Name, metav1.GetOptions{}) + require.Equal(t, customMCPName, customNodeMCN.Spec.Pool.Name) + require.NoError(t, customErr) +}