Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MCO-1501: Add support for custom MCPs in MCN #4876

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/machine-config-controller/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ func runStartCmd(_ *cobra.Command, _ []string) {
draincontroller := drain.New(
drain.DefaultConfig(),
ctrlctx.KubeInformerFactory.Core().V1().Nodes(),
ctrlctx.InformerFactory.Machineconfiguration().V1().MachineConfigPools(),
ctrlctx.ClientBuilder.KubeClientOrDie("node-update-controller"),
ctrlctx.ClientBuilder.MachineConfigClientOrDie("node-update-controller"),
ctrlctx.FeatureGateAccess,
Expand Down
1 change: 1 addition & 0 deletions cmd/machine-config-daemon/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ func runStartCmd(_ *cobra.Command, _ []string) {
ctrlctx.InformerFactory.Machineconfiguration().V1().MachineConfigs(),
ctrlctx.KubeInformerFactory.Core().V1().Nodes(),
ctrlctx.InformerFactory.Machineconfiguration().V1().ControllerConfigs(),
ctrlctx.InformerFactory.Machineconfiguration().V1().MachineConfigPools(),
ctrlctx.ClientBuilder.OperatorClientOrDie(componentName),
startOpts.kubeletHealthzEnabled,
startOpts.kubeletHealthzEndpoint,
Expand Down
33 changes: 31 additions & 2 deletions pkg/controller/drain/drain_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/openshift/library-go/pkg/operator/configobserver/featuregates"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants"
"github.com/openshift/machine-config-operator/pkg/helpers"
"github.com/openshift/machine-config-operator/pkg/upgrademonitor"

apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -23,6 +24,8 @@ import (
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/apimachinery/pkg/util/wait"

mcfginformersv1 "github.com/openshift/client-go/machineconfiguration/informers/externalversions/machineconfiguration/v1"
mcfglistersv1 "github.com/openshift/client-go/machineconfiguration/listers/machineconfiguration/v1"
corev1 "k8s.io/api/core/v1"
coreinformersv1 "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
Expand Down Expand Up @@ -98,6 +101,9 @@ type Controller struct {
nodeLister corelisterv1.NodeLister
nodeListerSynced cache.InformerSynced

mcpLister mcfglistersv1.MachineConfigPoolLister
mcpListerSynced cache.InformerSynced

queue workqueue.TypedRateLimitingInterface[string]
ongoingDrains map[string]time.Time

Expand All @@ -110,6 +116,7 @@ type Controller struct {
func New(
cfg Config,
nodeInformer coreinformersv1.NodeInformer,
mcpInformer mcfginformersv1.MachineConfigPoolInformer,
kubeClient clientset.Interface,
mcfgClient mcfgclientset.Interface,
fgAccessor featuregates.FeatureGateAccess,
Expand Down Expand Up @@ -140,6 +147,9 @@ func New(
ctrl.nodeLister = nodeInformer.Lister()
ctrl.nodeListerSynced = nodeInformer.Informer().HasSynced

ctrl.mcpLister = mcpInformer.Lister()
ctrl.mcpListerSynced = mcpInformer.Informer().HasSynced

return ctrl
}

Expand All @@ -159,7 +169,7 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer ctrl.queue.ShutDown()

if !cache.WaitForCacheSync(stopCh, ctrl.nodeListerSynced) {
if !cache.WaitForCacheSync(stopCh, ctrl.nodeListerSynced, ctrl.mcpListerSynced) {
return
}

Expand Down Expand Up @@ -305,6 +315,12 @@ func (ctrl *Controller) syncNode(key string) error {
Ctx: context.TODO(),
}

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(ctrl.mcpLister, node)
if err != nil {
return err
}

desiredVerb := strings.Split(desiredState, "-")[0]
switch desiredVerb {
case daemonconsts.DrainerStateUncordon:
Expand All @@ -318,6 +334,7 @@ func (ctrl *Controller) syncNode(key string) error {
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if nErr != nil {
klog.Errorf("Error making MCN for Uncordon failure: %v", err)
Expand All @@ -333,6 +350,7 @@ func (ctrl *Controller) syncNode(key string) error {
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for UnCordon success: %v", err)
Expand Down Expand Up @@ -387,6 +405,12 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro
break
}

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(ctrl.mcpLister, node)
if err != nil {
return err
}

if !isOngoingDrain {
ctrl.logNode(node, "cordoning")
// perform cordon
Expand All @@ -398,6 +422,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if Nerr != nil {
klog.Errorf("Error making MCN for Cordon Failure: %v", Nerr)
Expand All @@ -412,6 +437,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Cordon Success: %v", err)
Expand All @@ -420,14 +446,15 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro

// Attempt drain
ctrl.logNode(node, "initiating drain")
err := upgrademonitor.GenerateAndApplyMachineConfigNodes(
err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: v1alpha1.MachineConfigNodeUpdateExecuted, Reason: string(v1alpha1.MachineConfigNodeUpdateDrained), Message: "Draining Node as part of update executed phase"},
&upgrademonitor.Condition{State: v1alpha1.MachineConfigNodeUpdateDrained, Reason: fmt.Sprintf("%s%s", string(v1alpha1.MachineConfigNodeUpdateExecuted), string(v1alpha1.MachineConfigNodeUpdateDrained)), Message: fmt.Sprintf("Draining node. The drain will not be complete until desired drainer %s matches current drainer %s", node.Annotations[daemonconsts.DesiredDrainerAnnotationKey], node.Annotations[daemonconsts.LastAppliedDrainerAnnotationKey])},
metav1.ConditionUnknown,
metav1.ConditionUnknown,
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Drain beginning: %v", err)
Expand Down Expand Up @@ -457,6 +484,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if nErr != nil {
klog.Errorf("Error making MCN for Drain failure: %v", nErr)
Expand All @@ -473,6 +501,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Drain success: %v", err)
Expand Down
30 changes: 27 additions & 3 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ import (
mcoResourceRead "github.com/openshift/machine-config-operator/lib/resourceread"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
"github.com/openshift/machine-config-operator/pkg/daemon/constants"
"github.com/openshift/machine-config-operator/pkg/upgrademonitor"

"github.com/openshift/machine-config-operator/pkg/daemon/osrelease"
"github.com/openshift/machine-config-operator/pkg/helpers"
"github.com/openshift/machine-config-operator/pkg/upgrademonitor"
)

// Daemon is the dispatch point for the functions of the agent on the
Expand Down Expand Up @@ -100,6 +100,9 @@ type Daemon struct {
mcLister mcfglistersv1.MachineConfigLister
mcListerSynced cache.InformerSynced

mcpLister mcfglistersv1.MachineConfigPoolLister
mcpListerSynced cache.InformerSynced

ccLister mcfglistersv1.ControllerConfigLister
ccListerSynced cache.InformerSynced

Expand Down Expand Up @@ -362,6 +365,7 @@ func (dn *Daemon) ClusterConnect(
mcInformer mcfginformersv1.MachineConfigInformer,
nodeInformer coreinformersv1.NodeInformer,
ccInformer mcfginformersv1.ControllerConfigInformer,
mcpInformer mcfginformersv1.MachineConfigPoolInformer,
mcopClient mcopclientset.Interface,
kubeletHealthzEnabled bool,
kubeletHealthzEndpoint string,
Expand Down Expand Up @@ -396,6 +400,8 @@ func (dn *Daemon) ClusterConnect(
})
dn.ccLister = ccInformer.Lister()
dn.ccListerSynced = ccInformer.Informer().HasSynced
dn.mcpLister = mcpInformer.Lister()
dn.mcpListerSynced = mcpInformer.Informer().HasSynced

nw, err := newNodeWriter(dn.name, dn.stopCh)
if err != nil {
Expand Down Expand Up @@ -705,6 +711,12 @@ func (dn *Daemon) syncNode(key string) error {
return nil
}

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, node)
if err != nil {
return err
}

if node.Annotations[constants.MachineConfigDaemonPostConfigAction] == constants.MachineConfigDaemonStateRebooting {
klog.Info("Detected Rebooting Annotation, applying MCN.")
err := upgrademonitor.GenerateAndApplyMachineConfigNodes(
Expand All @@ -715,6 +727,7 @@ func (dn *Daemon) syncNode(key string) error {
node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Rebooted: %v", err)
Expand Down Expand Up @@ -790,6 +803,7 @@ func (dn *Daemon) syncNode(key string) error {
node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Resumed true: %v", err)
Expand Down Expand Up @@ -828,6 +842,7 @@ func (dn *Daemon) syncNode(key string) error {
dn.node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Updated false: %v", err)
Expand All @@ -852,6 +867,7 @@ func (dn *Daemon) syncNode(key string) error {
dn.node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Updated: %v", err)
Expand Down Expand Up @@ -1379,7 +1395,7 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error, errCh chan er
defer dn.ccQueue.ShutDown()
defer dn.preserveDaemonLogs()

if !cache.WaitForCacheSync(stopCh, dn.nodeListerSynced, dn.mcListerSynced, dn.ccListerSynced) {
if !cache.WaitForCacheSync(stopCh, dn.nodeListerSynced, dn.mcListerSynced, dn.ccListerSynced, dn.mcpListerSynced) {
return fmt.Errorf("failed to sync initial listers cache")
}

Expand Down Expand Up @@ -2300,6 +2316,13 @@ func (dn *Daemon) updateConfigAndState(state *stateAndConfigs) (bool, bool, erro
if inDesiredConfig {
// Great, we've successfully rebooted for the desired config,
// let's mark it done!

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node)
if err != nil {
return missingODC, inDesiredConfig, err
}

err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeResumed, Reason: string(mcfgalphav1.MachineConfigNodeResumed), Message: fmt.Sprintf("In desired config %s. Resumed normal operations. Applying proper annotations.", state.currentConfig.Name)},
nil,
Expand All @@ -2308,6 +2331,7 @@ func (dn *Daemon) updateConfigAndState(state *stateAndConfigs) (bool, bool, erro
dn.node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Resumed true: %v", err)
Expand Down
2 changes: 2 additions & 0 deletions pkg/daemon/daemon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ func (f *fixture) newController() *Daemon {
i.Machineconfiguration().V1().MachineConfigs(),
k8sI.Core().V1().Nodes(),
i.Machineconfiguration().V1().ControllerConfigs(),
i.Machineconfiguration().V1().MachineConfigPools(),
f.oclient,
false,
"",
Expand All @@ -169,6 +170,7 @@ func (f *fixture) newController() *Daemon {

d.mcListerSynced = alwaysReady
d.nodeListerSynced = alwaysReady
d.mcpListerSynced = alwaysReady

stopCh := make(chan struct{})
defer close(stopCh)
Expand Down
12 changes: 11 additions & 1 deletion pkg/daemon/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import (
"github.com/openshift/machine-config-operator/pkg/apihelpers"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
"github.com/openshift/machine-config-operator/pkg/daemon/constants"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: extra line

"github.com/openshift/machine-config-operator/pkg/helpers"
"github.com/openshift/machine-config-operator/pkg/upgrademonitor"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -39,14 +41,22 @@ func (dn *Daemon) performDrain() error {

if !dn.drainRequired() {
logSystem("Drain not required, skipping")
err := upgrademonitor.GenerateAndApplyMachineConfigNodes(

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node)
if err != nil {
return err
}

err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgalphav1.MachineConfigNodeUpdateDrained), Message: "Node Drain Not required for this update."},
&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdateDrained, Reason: fmt.Sprintf("%s%s", string(mcfgalphav1.MachineConfigNodeUpdateExecuted), string(mcfgalphav1.MachineConfigNodeUpdateDrained)), Message: "Node Drain Not required for this update."},
metav1.ConditionUnknown,
metav1.ConditionFalse,
dn.node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Drain not required: %v", err)
Expand Down
22 changes: 22 additions & 0 deletions pkg/daemon/pinned_image_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,12 @@ func (p *PinnedImageSetManager) updateStatusProgressing(pools []*mcfgv1.MachineC
}
imageSetSpec := getPinnedImageSetSpecForPools(pools)

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(p.mcpLister, node)
if err != nil {
return err
}

return upgrademonitor.UpdateMachineConfigNodeStatus(
&upgrademonitor.Condition{
State: mcfgv1alpha1.MachineConfigNodePinnedImageSetsProgressing,
Expand All @@ -558,6 +564,7 @@ func (p *PinnedImageSetManager) updateStatusProgressing(pools []*mcfgv1.MachineC
applyCfg,
imageSetSpec,
p.featureGatesAccessor,
pool,
)
}

Expand All @@ -574,6 +581,12 @@ func (p *PinnedImageSetManager) updateStatusProgressingComplete(pools []*mcfgv1.
}
imageSetSpec := getPinnedImageSetSpecForPools(pools)

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(p.mcpLister, node)
if err != nil {
return err
}

err = upgrademonitor.UpdateMachineConfigNodeStatus(
&upgrademonitor.Condition{
State: mcfgv1alpha1.MachineConfigNodePinnedImageSetsProgressing,
Expand All @@ -588,6 +601,7 @@ func (p *PinnedImageSetManager) updateStatusProgressingComplete(pools []*mcfgv1.
applyCfg,
imageSetSpec,
p.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Failed to updated machine config node: %v", err)
Expand All @@ -608,6 +622,7 @@ func (p *PinnedImageSetManager) updateStatusProgressingComplete(pools []*mcfgv1.
nil,
nil,
p.featureGatesAccessor,
pool,
)
}

Expand All @@ -632,6 +647,12 @@ func (p *PinnedImageSetManager) updateStatusError(pools []*mcfgv1.MachineConfigP
errMsg = statusErr.Error()
}

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(p.mcpLister, node)
if err != nil {
return err
}

return upgrademonitor.UpdateMachineConfigNodeStatus(
&upgrademonitor.Condition{
State: mcfgv1alpha1.MachineConfigNodePinnedImageSetsDegraded,
Expand All @@ -646,6 +667,7 @@ func (p *PinnedImageSetManager) updateStatusError(pools []*mcfgv1.MachineConfigP
applyCfg,
imageSetSpec,
p.featureGatesAccessor,
pool,
)
}

Expand Down
Loading