Skip to content

Commit d8ddd80

Browse files
mco-1598: validate MCN on node creation and deletion
1 parent e23bc51 commit d8ddd80

File tree

4 files changed

+392
-2
lines changed

4 files changed

+392
-2
lines changed

test/extended/machine_config/helpers.go

+307
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"errors"
77
"fmt"
88
"math/rand"
9+
"strings"
910
"time"
1011

1112
osconfigv1 "github.com/openshift/api/config/v1"
@@ -581,3 +582,309 @@ func restoreDesiredConfig(oc *exutil.CLI, node corev1.Node) error {
581582
configErr := oc.Run("patch").Args(fmt.Sprintf("node/%v", node.Name), "--patch", fmt.Sprintf(`{"metadata":{"annotations":{"machineconfiguration.openshift.io/desiredConfig":"%v"}}}`, currentConfig), "--type=merge").Execute()
582583
return configErr
583584
}
585+
586+
// `workersCanBeScaled` checks whether the worker nodes in a cluster can be scaled.
587+
// Cases where scaling worker nodes is not possible include:
588+
// - Baremetal platform
589+
// - MachineAPI is disabled
590+
// - Error getting list of MachineSets / no MachineSets exist
591+
// - All MachineSets have 0 worker nodes
592+
func workersCanBeScaled(oc *exutil.CLI, machineClient *machineclient.Clientset) (bool, error) {
593+
framework.Logf("Checking if worker nodes can be scaled using machinesets.")
594+
595+
// Check if platform is baremetal
596+
framework.Logf("Checking if cluster platform is baremetal.")
597+
if checkPlatform(oc) == "baremetal" {
598+
framework.Logf("Cluster platform is baremetal. Nodes cannot be scaled in baremetal test environments.")
599+
return false, nil
600+
}
601+
602+
// Check if MachineAPI is enabled
603+
framework.Logf("Checking if MachineAPI is enabled.")
604+
if !isCapabilityEnabled(oc, "MachineAPI") {
605+
framework.Logf("MachineAPI capability is not enabled. Nodes cannot be scaled.")
606+
return false, nil
607+
}
608+
609+
// Get MachineSets
610+
framework.Logf("Getting MachineSets.")
611+
machineSets, machineSetErr := machineClient.MachineV1beta1().MachineSets("openshift-machine-api").List(context.TODO(), metav1.ListOptions{})
612+
if machineSetErr != nil {
613+
framework.Logf("Error getting list of MachineSets.")
614+
return false, machineSetErr
615+
} else if len(machineSets.Items) == 0 {
616+
framework.Logf("No MachineSets configured. Nodes cannot be scaled.")
617+
return false, nil
618+
}
619+
620+
// Check if all MachineSets have 0 replicas
621+
// Per openshift-tests-private repo:
622+
// "In some UPI/SNO/Compact clusters machineset resources exist, but they are all configured with 0 replicas
623+
// If all machinesets have 0 replicas, then it means that we need to skip the test case"
624+
machineSetsWithReplicas := 0
625+
for _, machineSet := range machineSets.Items {
626+
replicas := machineSet.Spec.Replicas
627+
machineSetsWithReplicas += int(*replicas)
628+
}
629+
if machineSetsWithReplicas == 0 {
630+
framework.Logf("All machinesets have 0 worker nodes. Nodes cannot be scaled.")
631+
return false, nil
632+
}
633+
634+
return true, nil
635+
}
636+
637+
// `checkPlatform` returns the cluster's platform
638+
func checkPlatform(oc *exutil.CLI) string {
639+
output, err := oc.AsAdmin().Run("get").Args("infrastructure", "cluster", "-o=jsonpath={.status.platformStatus.type}").Output()
640+
o.Expect(err).NotTo(o.HaveOccurred(), "Failed determining cluster infrastructure.")
641+
return strings.ToLower(output)
642+
}
643+
644+
// `isCapabilityEnabled` checks whether a capability is in the cluster's enabledCapabilities list
645+
func isCapabilityEnabled(oc *exutil.CLI, desiredCapability osconfigv1.ClusterVersionCapability) bool {
646+
enabledCapabilities := getEnabledCapabilities(oc)
647+
enabled := false
648+
for _, enabledCapability := range enabledCapabilities {
649+
if enabledCapability == desiredCapability {
650+
enabled = true
651+
break
652+
}
653+
}
654+
framework.Logf("Capability [%s] is enabled: %v", desiredCapability, enabled)
655+
656+
return enabled
657+
}
658+
659+
// `getEnabledCapabilities` gets a cluster's enabled capability list
660+
func getEnabledCapabilities(oc *exutil.CLI) []osconfigv1.ClusterVersionCapability {
661+
clusterversion, err := oc.AsAdmin().AdminConfigClient().ConfigV1().ClusterVersions().Get(context.TODO(), "version", metav1.GetOptions{})
662+
o.Expect(err).NotTo(o.HaveOccurred(), "Error getting clusterverion.")
663+
enabledCapabilities := clusterversion.Status.Capabilities.EnabledCapabilities
664+
665+
return enabledCapabilities
666+
}
667+
668+
// `ScaleMachineSet` scales the provided MachineSet by updating the replica to be the provided value
669+
func ScaleMachineSet(oc *exutil.CLI, machineSetName string, replicaValue string) error {
670+
return oc.Run("scale").Args(fmt.Sprintf("--replicas=%v", replicaValue), "machinesets.machine.openshift.io", machineSetName, "-n", "openshift-machine-api").Execute()
671+
}
672+
673+
// GetMachinesByPhase get machine by phase e.g. Running, Provisioning, Provisioned, Deleting etc.
674+
func GetMachinesByPhase(machineClient *machineclient.Clientset, machineSetName string, desiredPhase string) (machinev1beta1.Machine, error) {
675+
desiredMachine := machinev1beta1.Machine{}
676+
err := fmt.Errorf("no %v machine found in %v MachineSet", desiredPhase, machineSetName)
677+
o.Eventually(func() bool {
678+
framework.Logf("Trying to get machine with phase %v from MachineSet %v.", desiredPhase, machineSetName)
679+
680+
// Get machines in desired MachineSet
681+
machines, machinesErr := machineClient.MachineV1beta1().Machines(mapiNamespace).List(context.Background(), metav1.ListOptions{LabelSelector: fmt.Sprintf("machine.openshift.io/cluster-api-machineset=%v", machineSetName)})
682+
o.Expect(machinesErr).NotTo(o.HaveOccurred())
683+
684+
// Find machine in desired phase
685+
for _, machine := range machines.Items {
686+
machinePhase := ptr.Deref(machine.Status.Phase, "")
687+
if machinePhase == desiredPhase {
688+
desiredMachine = machine
689+
err = nil
690+
return true
691+
}
692+
}
693+
return false
694+
}, 1*time.Minute, 3*time.Second).Should(o.BeTrue())
695+
return desiredMachine, err
696+
}
697+
698+
// `WaitForMachineInState` waits for the desired machine to be in the desired state
699+
func WaitForMachineInState(machineClient *machineclient.Clientset, machineName string, desiredPhase string) error {
700+
o.Eventually(func() bool {
701+
// Get the desired machine
702+
machine, machineErr := machineClient.MachineV1beta1().Machines(mapiNamespace).Get(context.TODO(), machineName, metav1.GetOptions{})
703+
o.Expect(machineErr).NotTo(o.HaveOccurred())
704+
705+
// Check if machine phase is desired phase
706+
machinePhase := ptr.Deref(machine.Status.Phase, "")
707+
framework.Logf("Machine %v is in %v phase.", machineName, machinePhase)
708+
return machinePhase == desiredPhase
709+
}, 7*time.Minute, 10*time.Second).Should(o.BeTrue())
710+
return nil
711+
}
712+
713+
// `getNodeInMachine` gets the node associated with a machine
714+
func getNodeInMachine(oc *exutil.CLI, machineName string) (corev1.Node, error) {
715+
// Get name of nodes associated with the desired machine
716+
nodeNames, nodeNamesErr := oc.Run("get").Args("nodes", "-o", fmt.Sprintf(`jsonpath='{.items[?(@.metadata.annotations.machine\.openshift\.io/machine=="openshift-machine-api/%v")].metadata.name}'`, machineName)).Output()
717+
if nodeNamesErr != nil { //error getting filtered node names
718+
return corev1.Node{}, nodeNamesErr
719+
} else if nodeNames == "" { //error when no nodes are found
720+
return corev1.Node{}, fmt.Errorf("no node is linked to Machine: %s", machineName)
721+
}
722+
723+
// Determine the number of nodes in the Machine
724+
// Note: the format of `nodeNames` is the names of nodes seperated by a space (ex: "node-name-1 node-name-2"),
725+
// so the number of nodes is equal to one more than the number of spaces
726+
numberOfNodeNames := strings.Count(nodeNames, " ") + 1
727+
if numberOfNodeNames > 1 { //error when a machine has more than one node
728+
return corev1.Node{}, fmt.Errorf("more than one node is linked to Machine: %s; number of nodes: %d", machineName, numberOfNodeNames)
729+
}
730+
731+
node, nodeErr := oc.AsAdmin().KubeClient().CoreV1().Nodes().Get(context.TODO(), strings.ReplaceAll(nodeNames, "'", ""), metav1.GetOptions{})
732+
if nodeErr != nil { //error getting filtered node names
733+
return corev1.Node{}, nodeErr
734+
}
735+
736+
return *node, nil
737+
}
738+
739+
// `getNewReadyNodeInMachine` waits for the newly provisioned node in a desired machine node to be ready
740+
func getNewReadyNodeInMachine(oc *exutil.CLI, machineName string) (corev1.Node, error) {
741+
desiredNode := corev1.Node{}
742+
err := fmt.Errorf("no ready node in Machine: %s", machineName)
743+
o.Eventually(func() bool {
744+
// Get the desired node
745+
node, nodeErr := getNodeInMachine(oc, machineName)
746+
o.Expect(nodeErr).NotTo(o.HaveOccurred())
747+
748+
// Check if node is in desiredStatus
749+
framework.Logf("Checking if node %v is ready.", node.Name)
750+
if isNodeReady(node) {
751+
desiredNode = node
752+
err = nil
753+
return true
754+
}
755+
756+
return false
757+
}, 2*time.Minute, 3*time.Second).Should(o.BeTrue())
758+
return desiredNode, err
759+
}
760+
761+
// `WaitForValidMCNProperties` waits for the MCN of a node to be valid. To be valid, the following must be true:
762+
// - MCN with name equivalent to node name exists
763+
// - Pool name in MCN spec matches node MCP association
764+
// - Desired config version of node matches desired config version in MCN spec
765+
// - Current config version of node matches current config version in MCN status
766+
// - Desired config version of node matches desired config version in MCN status
767+
func WaitForValidMCNProperties(clientSet *machineconfigclient.Clientset, node corev1.Node) error {
768+
nodeDesiredConfig := node.Annotations["machineconfiguration.openshift.io/desiredConfig"]
769+
nodeCurrentConfig := node.Annotations["machineconfiguration.openshift.io/currentConfig"]
770+
771+
// Check MCN exists and that its name and node name match
772+
framework.Logf("Checking MCN exists and name matches node name.")
773+
o.Eventually(func() bool {
774+
// Get the desired MCN
775+
newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
776+
if newMCNErr != nil {
777+
framework.Logf("Failed getting MCN %v", node.Name)
778+
return false
779+
}
780+
781+
// Check if MCN name matches node's name
782+
framework.Logf("Node name: %v. MCN name: %v.", node.Name, newMCN.Name)
783+
return node.Name == newMCN.Name
784+
}, 20*time.Second, 2*time.Second).Should(o.BeTrue(), fmt.Sprintf("Could not get MCN for node %v", node.Name))
785+
786+
// Check pool name in MCN matches node MCP association
787+
// Note: pool name should be default value of `worker`
788+
framework.Logf("Waiting for node MCP to match pool name in MCN %v spec.", node.Name)
789+
nodeMCP := ""
790+
var ok bool
791+
if _, ok = node.Labels["node-role.kubernetes.io/worker"]; ok {
792+
nodeMCP = "worker"
793+
} else {
794+
return fmt.Errorf("node MCP association could be determined for node %v; node is not in default worker pool", node.Name)
795+
}
796+
o.Eventually(func() bool {
797+
// Get the desired MCN
798+
newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
799+
if newMCNErr != nil {
800+
framework.Logf("Failed getting MCN %v", node.Name)
801+
return false
802+
}
803+
804+
// Check if MCN pool name in spec matches node's MCP association
805+
framework.Logf("Node MCP association: %v. MCN spec pool name: %v.", nodeMCP, newMCN.Spec.Pool.Name)
806+
return newMCN.Spec.Pool.Name == nodeMCP
807+
}, 1*time.Minute, 5*time.Second).Should(o.BeTrue())
808+
809+
// Check desired config version matches for node and MCN spec config version
810+
framework.Logf("Waiting for node desired config version to match desired config version in MCN %v spec.", node.Name)
811+
o.Eventually(func() bool {
812+
// Get the desired MCN
813+
newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
814+
if newMCNErr != nil {
815+
framework.Logf("Failed getting MCN %v", node.Name)
816+
return false
817+
}
818+
819+
// Check if MCN desired config version in spec matches node's desired config version
820+
framework.Logf("Node desired config version: %v. MCN spec desired config version: %v.", nodeDesiredConfig, newMCN.Spec.ConfigVersion.Desired)
821+
return newMCN.Spec.ConfigVersion.Desired == nodeDesiredConfig
822+
}, 1*time.Minute, 5*time.Second).Should(o.BeTrue())
823+
824+
// Check current config version matches for node and MCN status config version
825+
framework.Logf("Waiting for node current config version to match current config version in MCN %v status.", node.Name)
826+
o.Eventually(func() bool {
827+
// Get the desired MCN
828+
newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
829+
if newMCNErr != nil {
830+
framework.Logf("Failed getting MCN %v", node.Name)
831+
return false
832+
}
833+
834+
// Check if MCN current config version in status matches node's current config version
835+
framework.Logf("Node current config version: %v. MCN status current config version: %v.", nodeCurrentConfig, newMCN.Status.ConfigVersion.Current)
836+
return newMCN.Status.ConfigVersion.Current == nodeCurrentConfig
837+
}, 2*time.Minute, 5*time.Second).Should(o.BeTrue())
838+
839+
// Check desired config version matches for node and MCN status config version
840+
framework.Logf("Waiting for node desired config version to match desired config version in MCN %v status.", node.Name)
841+
o.Eventually(func() bool {
842+
// Get the desired MCN
843+
newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
844+
if newMCNErr != nil {
845+
framework.Logf("Failed getting MCN %v", node.Name)
846+
return false
847+
}
848+
849+
// Check if MCN desired config version in status matches node's desired config version
850+
framework.Logf("Node desired config version: %v. MCN status desired config version: %v.", nodeDesiredConfig, newMCN.Status.ConfigVersion.Desired)
851+
return newMCN.Status.ConfigVersion.Desired == nodeDesiredConfig
852+
}, 2*time.Minute, 5*time.Second).Should(o.BeTrue())
853+
return nil
854+
}
855+
856+
// `WaitForNodeToBeDeleted` waits for a node to no longer exist
857+
func WaitForNodeToBeDeleted(oc *exutil.CLI, nodeName string) error {
858+
o.Eventually(func() bool {
859+
framework.Logf("Check if node %v is deleted.", nodeName)
860+
861+
// Check if node still exists
862+
node, _ := oc.AsAdmin().KubeClient().CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
863+
if node == nil {
864+
framework.Logf("Node %v has been deleted.", nodeName)
865+
return true
866+
}
867+
868+
framework.Logf("Node %v still exists.", nodeName)
869+
return false
870+
}, 7*time.Minute, 3*time.Second).Should(o.BeTrue())
871+
return nil
872+
}
873+
874+
// `WaitForMCNToBeDeleted` waits for a MCN to no longer exist
875+
func WaitForMCNToBeDeleted(clientSet *machineconfigclient.Clientset, mcnName string) error {
876+
o.Eventually(func() bool {
877+
framework.Logf("Check if MCN %v is deleted.", mcnName)
878+
879+
// Check if MCN still exists
880+
mcn, _ := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), mcnName, metav1.GetOptions{})
881+
if mcn == nil {
882+
framework.Logf("MCN %v has been deleted.", mcnName)
883+
return true
884+
}
885+
886+
framework.Logf("MCN %v still exists.", mcnName)
887+
return false
888+
}, 4*time.Minute, 3*time.Second).Should(o.BeTrue())
889+
return nil
890+
}

0 commit comments

Comments
 (0)