Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(Not ready for review) MCO-1501: Add support for custom MCPs in MCN #4835

Closed
wants to merge 37 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
377d58e
start updating mcp setting for mcn
isabella-janssen Feb 6, 2025
bd3cf9a
placeholder comment
isabella-janssen Feb 7, 2025
f2b3e92
Merge branch 'master' of https://github.com/isabella-janssen/machine-…
isabella-janssen Feb 7, 2025
7f45386
testing cluster creation
isabella-janssen Feb 8, 2025
662753a
test cluster creation
isabella-janssen Feb 8, 2025
c7c2e48
testing cluster install
isabella-janssen Feb 9, 2025
4b0b1ab
testing cluster install
isabella-janssen Feb 9, 2025
19d2bac
testing cluster install
isabella-janssen Feb 10, 2025
1818219
test cluster install
isabella-janssen Feb 10, 2025
f17b76f
test cluster install
isabella-janssen Feb 10, 2025
25587bf
testing for cluster install
isabella-janssen Feb 10, 2025
9ab2834
testing cluster creation
isabella-janssen Feb 11, 2025
e6ed1b3
test cluster install
isabella-janssen Feb 11, 2025
bb1a255
cluster install test
isabella-janssen Feb 11, 2025
ea4ad65
test cluster install
isabella-janssen Feb 12, 2025
dceb03a
update testing strings to help with debugging
isabella-janssen Feb 12, 2025
cc52062
add default mcp check for sync node func
isabella-janssen Feb 12, 2025
35daeeb
testing install
isabella-janssen Feb 12, 2025
9f7b30f
test install changing sync order
isabella-janssen Feb 12, 2025
97d618a
test install changing sync order
isabella-janssen Feb 12, 2025
4ea6457
testing cluster install
isabella-janssen Feb 13, 2025
64e4ad3
test cluster install
isabella-janssen Feb 13, 2025
a80bedf
update to test cluster install
isabella-janssen Feb 13, 2025
82bacd5
testing for clsuter install
isabella-janssen Feb 14, 2025
2b401f4
testing install
isabella-janssen Feb 14, 2025
0889ad6
code cleanup
isabella-janssen Feb 14, 2025
d928e20
code cleanup
isabella-janssen Feb 14, 2025
2d6dd88
testing install fail point
isabella-janssen Feb 14, 2025
45c3cb5
add nil check for get primary pool
isabella-janssen Feb 17, 2025
140253f
test install
isabella-janssen Feb 17, 2025
8c1e721
add node nil check
isabella-janssen Feb 17, 2025
2ae588a
remove reference to nil value
isabella-janssen Feb 18, 2025
4110b25
add nil check to helper func
isabella-janssen Feb 18, 2025
cc9e73b
use new helper function with nil checks
isabella-janssen Feb 18, 2025
eee617b
change order of sync back to original
isabella-janssen Feb 18, 2025
94b88cc
use mcn pool helper
isabella-janssen Feb 18, 2025
4c5a795
cleanup
isabella-janssen Feb 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/machine-config-controller/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ func runStartCmd(_ *cobra.Command, _ []string) {
draincontroller := drain.New(
drain.DefaultConfig(),
ctrlctx.KubeInformerFactory.Core().V1().Nodes(),
ctrlctx.InformerFactory.Machineconfiguration().V1().MachineConfigPools(),
ctrlctx.ClientBuilder.KubeClientOrDie("node-update-controller"),
ctrlctx.ClientBuilder.MachineConfigClientOrDie("node-update-controller"),
ctrlctx.FeatureGateAccess,
Expand Down
1 change: 1 addition & 0 deletions cmd/machine-config-daemon/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ func runStartCmd(_ *cobra.Command, _ []string) {
ctrlctx.InformerFactory.Machineconfiguration().V1().MachineConfigs(),
ctrlctx.KubeInformerFactory.Core().V1().Nodes(),
ctrlctx.InformerFactory.Machineconfiguration().V1().ControllerConfigs(),
ctrlctx.InformerFactory.Machineconfiguration().V1().MachineConfigPools(),
ctrlctx.ClientBuilder.OperatorClientOrDie(componentName),
startOpts.kubeletHealthzEnabled,
startOpts.kubeletHealthzEndpoint,
Expand Down
33 changes: 31 additions & 2 deletions pkg/controller/drain/drain_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/openshift/library-go/pkg/operator/configobserver/featuregates"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants"
"github.com/openshift/machine-config-operator/pkg/helpers"
"github.com/openshift/machine-config-operator/pkg/upgrademonitor"

apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -23,6 +24,8 @@ import (
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/apimachinery/pkg/util/wait"

mcfginformersv1 "github.com/openshift/client-go/machineconfiguration/informers/externalversions/machineconfiguration/v1"
mcfglistersv1 "github.com/openshift/client-go/machineconfiguration/listers/machineconfiguration/v1"
corev1 "k8s.io/api/core/v1"
coreinformersv1 "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
Expand Down Expand Up @@ -98,6 +101,9 @@ type Controller struct {
nodeLister corelisterv1.NodeLister
nodeListerSynced cache.InformerSynced

mcpLister mcfglistersv1.MachineConfigPoolLister
mcpListerSynced cache.InformerSynced

queue workqueue.TypedRateLimitingInterface[string]
ongoingDrains map[string]time.Time

Expand All @@ -110,6 +116,7 @@ type Controller struct {
func New(
cfg Config,
nodeInformer coreinformersv1.NodeInformer,
mcpInformer mcfginformersv1.MachineConfigPoolInformer,
kubeClient clientset.Interface,
mcfgClient mcfgclientset.Interface,
fgAccessor featuregates.FeatureGateAccess,
Expand Down Expand Up @@ -140,6 +147,9 @@ func New(
ctrl.nodeLister = nodeInformer.Lister()
ctrl.nodeListerSynced = nodeInformer.Informer().HasSynced

ctrl.mcpLister = mcpInformer.Lister()
ctrl.mcpListerSynced = mcpInformer.Informer().HasSynced

return ctrl
}

Expand All @@ -159,7 +169,7 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer ctrl.queue.ShutDown()

if !cache.WaitForCacheSync(stopCh, ctrl.nodeListerSynced) {
if !cache.WaitForCacheSync(stopCh, ctrl.nodeListerSynced, ctrl.mcpListerSynced) {
return
}

Expand Down Expand Up @@ -305,6 +315,12 @@ func (ctrl *Controller) syncNode(key string) error {
Ctx: context.TODO(),
}

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(ctrl.mcpLister, node)
if err != nil {
return err
}

desiredVerb := strings.Split(desiredState, "-")[0]
switch desiredVerb {
case daemonconsts.DrainerStateUncordon:
Expand All @@ -318,6 +334,7 @@ func (ctrl *Controller) syncNode(key string) error {
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if nErr != nil {
klog.Errorf("Error making MCN for Uncordon failure: %v", err)
Expand All @@ -333,6 +350,7 @@ func (ctrl *Controller) syncNode(key string) error {
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for UnCordon success: %v", err)
Expand Down Expand Up @@ -387,6 +405,12 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro
break
}

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(ctrl.mcpLister, node)
if err != nil {
return err
}

if !isOngoingDrain {
ctrl.logNode(node, "cordoning")
// perform cordon
Expand All @@ -398,6 +422,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if Nerr != nil {
klog.Errorf("Error making MCN for Cordon Failure: %v", Nerr)
Expand All @@ -412,6 +437,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Cordon Success: %v", err)
Expand All @@ -420,14 +446,15 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro

// Attempt drain
ctrl.logNode(node, "initiating drain")
err := upgrademonitor.GenerateAndApplyMachineConfigNodes(
err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: v1alpha1.MachineConfigNodeUpdateExecuted, Reason: string(v1alpha1.MachineConfigNodeUpdateDrained), Message: "Draining Node as part of update executed phase"},
&upgrademonitor.Condition{State: v1alpha1.MachineConfigNodeUpdateDrained, Reason: fmt.Sprintf("%s%s", string(v1alpha1.MachineConfigNodeUpdateExecuted), string(v1alpha1.MachineConfigNodeUpdateDrained)), Message: fmt.Sprintf("Draining node. The drain will not be complete until desired drainer %s matches current drainer %s", node.Annotations[daemonconsts.DesiredDrainerAnnotationKey], node.Annotations[daemonconsts.LastAppliedDrainerAnnotationKey])},
metav1.ConditionUnknown,
metav1.ConditionUnknown,
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Drain beginning: %v", err)
Expand Down Expand Up @@ -457,6 +484,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if nErr != nil {
klog.Errorf("Error making MCN for Drain failure: %v", nErr)
Expand All @@ -473,6 +501,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro
node,
ctrl.client,
ctrl.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Drain success: %v", err)
Expand Down
30 changes: 27 additions & 3 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ import (
mcoResourceRead "github.com/openshift/machine-config-operator/lib/resourceread"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
"github.com/openshift/machine-config-operator/pkg/daemon/constants"
"github.com/openshift/machine-config-operator/pkg/upgrademonitor"

"github.com/openshift/machine-config-operator/pkg/daemon/osrelease"
"github.com/openshift/machine-config-operator/pkg/helpers"
"github.com/openshift/machine-config-operator/pkg/upgrademonitor"
)

// Daemon is the dispatch point for the functions of the agent on the
Expand Down Expand Up @@ -100,6 +100,9 @@ type Daemon struct {
mcLister mcfglistersv1.MachineConfigLister
mcListerSynced cache.InformerSynced

mcpLister mcfglistersv1.MachineConfigPoolLister
mcpListerSynced cache.InformerSynced

ccLister mcfglistersv1.ControllerConfigLister
ccListerSynced cache.InformerSynced

Expand Down Expand Up @@ -362,6 +365,7 @@ func (dn *Daemon) ClusterConnect(
mcInformer mcfginformersv1.MachineConfigInformer,
nodeInformer coreinformersv1.NodeInformer,
ccInformer mcfginformersv1.ControllerConfigInformer,
mcpInformer mcfginformersv1.MachineConfigPoolInformer,
mcopClient mcopclientset.Interface,
kubeletHealthzEnabled bool,
kubeletHealthzEndpoint string,
Expand Down Expand Up @@ -396,6 +400,8 @@ func (dn *Daemon) ClusterConnect(
})
dn.ccLister = ccInformer.Lister()
dn.ccListerSynced = ccInformer.Informer().HasSynced
dn.mcpLister = mcpInformer.Lister()
dn.mcpListerSynced = mcpInformer.Informer().HasSynced

nw, err := newNodeWriter(dn.name, dn.stopCh)
if err != nil {
Expand Down Expand Up @@ -705,6 +711,12 @@ func (dn *Daemon) syncNode(key string) error {
return nil
}

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, node)
if err != nil {
return err
}

if node.Annotations[constants.MachineConfigDaemonPostConfigAction] == constants.MachineConfigDaemonStateRebooting {
klog.Info("Detected Rebooting Annotation, applying MCN.")
err := upgrademonitor.GenerateAndApplyMachineConfigNodes(
Expand All @@ -715,6 +727,7 @@ func (dn *Daemon) syncNode(key string) error {
node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Rebooted: %v", err)
Expand Down Expand Up @@ -790,6 +803,7 @@ func (dn *Daemon) syncNode(key string) error {
node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Resumed true: %v", err)
Expand Down Expand Up @@ -828,6 +842,7 @@ func (dn *Daemon) syncNode(key string) error {
dn.node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Updated false: %v", err)
Expand All @@ -852,6 +867,7 @@ func (dn *Daemon) syncNode(key string) error {
dn.node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Updated: %v", err)
Expand Down Expand Up @@ -1379,7 +1395,7 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error, errCh chan er
defer dn.ccQueue.ShutDown()
defer dn.preserveDaemonLogs()

if !cache.WaitForCacheSync(stopCh, dn.nodeListerSynced, dn.mcListerSynced, dn.ccListerSynced) {
if !cache.WaitForCacheSync(stopCh, dn.nodeListerSynced, dn.mcListerSynced, dn.ccListerSynced, dn.mcpListerSynced) {
return fmt.Errorf("failed to sync initial listers cache")
}

Expand Down Expand Up @@ -2300,6 +2316,13 @@ func (dn *Daemon) updateConfigAndState(state *stateAndConfigs) (bool, bool, erro
if inDesiredConfig {
// Great, we've successfully rebooted for the desired config,
// let's mark it done!

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node)
if err != nil {
return missingODC, inDesiredConfig, err
}

err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeResumed, Reason: string(mcfgalphav1.MachineConfigNodeResumed), Message: fmt.Sprintf("In desired config %s. Resumed normal operations. Applying proper annotations.", state.currentConfig.Name)},
nil,
Expand All @@ -2308,6 +2331,7 @@ func (dn *Daemon) updateConfigAndState(state *stateAndConfigs) (bool, bool, erro
dn.node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Resumed true: %v", err)
Expand Down
2 changes: 2 additions & 0 deletions pkg/daemon/daemon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ func (f *fixture) newController() *Daemon {
i.Machineconfiguration().V1().MachineConfigs(),
k8sI.Core().V1().Nodes(),
i.Machineconfiguration().V1().ControllerConfigs(),
i.Machineconfiguration().V1().MachineConfigPools(),
f.oclient,
false,
"",
Expand All @@ -169,6 +170,7 @@ func (f *fixture) newController() *Daemon {

d.mcListerSynced = alwaysReady
d.nodeListerSynced = alwaysReady
d.mcpListerSynced = alwaysReady

stopCh := make(chan struct{})
defer close(stopCh)
Expand Down
12 changes: 11 additions & 1 deletion pkg/daemon/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import (
"github.com/openshift/machine-config-operator/pkg/apihelpers"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
"github.com/openshift/machine-config-operator/pkg/daemon/constants"

"github.com/openshift/machine-config-operator/pkg/helpers"
"github.com/openshift/machine-config-operator/pkg/upgrademonitor"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -39,14 +41,22 @@ func (dn *Daemon) performDrain() error {

if !dn.drainRequired() {
logSystem("Drain not required, skipping")
err := upgrademonitor.GenerateAndApplyMachineConfigNodes(

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node)
if err != nil {
return err
}

err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgalphav1.MachineConfigNodeUpdateDrained), Message: "Node Drain Not required for this update."},
&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdateDrained, Reason: fmt.Sprintf("%s%s", string(mcfgalphav1.MachineConfigNodeUpdateExecuted), string(mcfgalphav1.MachineConfigNodeUpdateDrained)), Message: "Node Drain Not required for this update."},
metav1.ConditionUnknown,
metav1.ConditionFalse,
dn.node,
dn.mcfgClient,
dn.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Drain not required: %v", err)
Expand Down
22 changes: 22 additions & 0 deletions pkg/daemon/pinned_image_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,12 @@ func (p *PinnedImageSetManager) updateStatusProgressing(pools []*mcfgv1.MachineC
}
imageSetSpec := getPinnedImageSetSpecForPools(pools)

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(p.mcpLister, node)
if err != nil {
return err
}

return upgrademonitor.UpdateMachineConfigNodeStatus(
&upgrademonitor.Condition{
State: mcfgv1alpha1.MachineConfigNodePinnedImageSetsProgressing,
Expand All @@ -558,6 +564,7 @@ func (p *PinnedImageSetManager) updateStatusProgressing(pools []*mcfgv1.MachineC
applyCfg,
imageSetSpec,
p.featureGatesAccessor,
pool,
)
}

Expand All @@ -574,6 +581,12 @@ func (p *PinnedImageSetManager) updateStatusProgressingComplete(pools []*mcfgv1.
}
imageSetSpec := getPinnedImageSetSpecForPools(pools)

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(p.mcpLister, node)
if err != nil {
return err
}

err = upgrademonitor.UpdateMachineConfigNodeStatus(
&upgrademonitor.Condition{
State: mcfgv1alpha1.MachineConfigNodePinnedImageSetsProgressing,
Expand All @@ -588,6 +601,7 @@ func (p *PinnedImageSetManager) updateStatusProgressingComplete(pools []*mcfgv1.
applyCfg,
imageSetSpec,
p.featureGatesAccessor,
pool,
)
if err != nil {
klog.Errorf("Failed to updated machine config node: %v", err)
Expand All @@ -608,6 +622,7 @@ func (p *PinnedImageSetManager) updateStatusProgressingComplete(pools []*mcfgv1.
nil,
nil,
p.featureGatesAccessor,
pool,
)
}

Expand All @@ -632,6 +647,12 @@ func (p *PinnedImageSetManager) updateStatusError(pools []*mcfgv1.MachineConfigP
errMsg = statusErr.Error()
}

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(p.mcpLister, node)
if err != nil {
return err
}

return upgrademonitor.UpdateMachineConfigNodeStatus(
&upgrademonitor.Condition{
State: mcfgv1alpha1.MachineConfigNodePinnedImageSetsDegraded,
Expand All @@ -646,6 +667,7 @@ func (p *PinnedImageSetManager) updateStatusError(pools []*mcfgv1.MachineConfigP
applyCfg,
imageSetSpec,
p.featureGatesAccessor,
pool,
)
}

Expand Down
Loading