Skip to content

Commit e23bc51

Browse files
mco-1597: Validate MCN condition status on node degrade
1 parent 463576e commit e23bc51

File tree

7 files changed

+194
-15
lines changed

7 files changed

+194
-15
lines changed

test/extended/machine_config/helpers.go

+83-10
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package machine_config
33
import (
44
"context"
55
"encoding/json"
6+
"errors"
67
"fmt"
78
"math/rand"
89
"time"
@@ -39,6 +40,7 @@ const (
3940
)
4041

4142
// TODO: add error message returns for `.NotTo(o.HaveOccurred())` cases.
43+
// TODO: fix caplitalization of helper funcs
4244

4345
// skipUnlessTargetPlatform skips the test if it is running on the target platform
4446
func skipUnlessTargetPlatform(oc *exutil.CLI, platformType osconfigv1.PlatformType) {
@@ -401,7 +403,7 @@ func WaitForMCPToBeReady(oc *exutil.CLI, machineConfigClient *machineconfigclien
401403
return nil
402404
}
403405

404-
// GetCordonedNodes get cordoned nodes (if maxUnavailable > 1 ) otherwise return the 1st cordoned node
406+
// `GetCordonedNodes` get cordoned nodes (if maxUnavailable > 1 ) otherwise return the 1st cordoned node
405407
func GetCordonedNodes(oc *exutil.CLI, mcpName string) []corev1.Node {
406408
// Wait for the MCP to start updating
407409
o.Expect(waitForMCPConditionStatus(oc, mcpName, "Updating", "True")).NotTo(o.HaveOccurred(), "Waiting for 'Updating' status change failed.")
@@ -413,6 +415,7 @@ func GetCordonedNodes(oc *exutil.CLI, mcpName string) []corev1.Node {
413415
o.Expect(nodeErr).NotTo(o.HaveOccurred(), "Error getting nodes from %v MCP.", mcpName)
414416
o.Expect(nodes).ShouldNot(o.BeEmpty(), "No nodes found for %v MCP.", mcpName)
415417

418+
// TOOD: cleanup
416419
for _, node := range nodes {
417420
unschedulable := node.Spec.Unschedulable
418421
if unschedulable {
@@ -451,27 +454,30 @@ func waitForMCPConditionStatus(oc *exutil.CLI, mcpName string, conditionType mcf
451454

452455
framework.Logf("Waiting for %v MCP's %v condition to be %v.", mcp.Name, conditionType, status)
453456
return false
454-
}, 5*time.Minute, 3*time.Second).Should(o.BeTrue())
457+
}, 2*time.Minute, 3*time.Second).Should(o.BeTrue())
455458
return nil
456459
}
457460

458461
// `waitForMCNConditionStatus` waits until the desired MCN condition matches the desired status (ex. wait until "Updated" is "False")
459462
func waitForMCNConditionStatus(clientSet *machineconfigclient.Clientset, mcnName string, conditionType string, status metav1.ConditionStatus, timeout time.Duration, interval time.Duration) error {
460463
o.Eventually(func() bool {
461-
// Get MCN & desried condition status
464+
framework.Logf("Waiting for MCN %v %v condition to be %v.", mcnName, conditionType, status)
465+
466+
// Get MCN & check if the MCN condition status matches the desired status
462467
workerNodeMCN, workerErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), mcnName, metav1.GetOptions{})
463468
o.Expect(workerErr).NotTo(o.HaveOccurred())
464-
conditionStatus := getMCNConditionStatus(workerNodeMCN, conditionType)
465-
if conditionStatus == status {
466-
return true
467-
}
468-
469-
framework.Logf("Waiting for MCN %v %v condition to be %v.", mcnName, conditionType, status)
470-
return false
469+
return checkMCNConditionStatus(workerNodeMCN, conditionType, status)
471470
}, timeout, interval).Should(o.BeTrue())
472471
return nil
473472
}
474473

474+
// `checkMCNConditionStatus` checks that an MCN condition matches the desired status (ex. confirm "Updated" is "False")
475+
func checkMCNConditionStatus(mcn *v1alpha1.MachineConfigNode, conditionType string, status metav1.ConditionStatus) bool {
476+
conditionStatus := getMCNConditionStatus(mcn, conditionType)
477+
framework.Logf("MCN %v %v condition is %v.", mcn.Name, conditionType, conditionStatus)
478+
return conditionStatus == status
479+
}
480+
475481
// `getMCNConditionStatus` returns the status of the desired condition type for MCN, or an empty string if the condition does not exist
476482
func getMCNConditionStatus(mcn *v1alpha1.MachineConfigNode, conditionType string) metav1.ConditionStatus {
477483
// Loop through conditions and return the status of the desired condition type
@@ -508,3 +514,70 @@ func confirmUpdatedMCNStatus(clientSet *machineconfigclient.Clientset, mcnName s
508514
framework.Logf("Node %s update is complete and MCN is valid.", mcnName)
509515
return true
510516
}
517+
518+
// TODO: consolidate with similar functions
519+
func GetDegradedNode(oc *exutil.CLI, mcpName string) (corev1.Node, error) {
520+
// Get nodes in desired pool
521+
nodes, nodeErr := GetNodesByRole(oc, mcpName)
522+
o.Expect(nodeErr).NotTo(o.HaveOccurred())
523+
o.Expect(nodes).ShouldNot(o.BeEmpty())
524+
525+
// Get degraded node
526+
for _, node := range nodes {
527+
// TODO: create generalized get node state helper
528+
state := node.Annotations["machineconfiguration.openshift.io/state"]
529+
if state == "Degraded" {
530+
return node, nil
531+
}
532+
}
533+
534+
return corev1.Node{}, errors.New("no degraded node found")
535+
}
536+
537+
// `recoverFromDegraded` updates the current and desired machine configs so that the pool can recover from degraded state once the offending MC is deleted
538+
func recoverFromDegraded(oc *exutil.CLI, mcpName string) error {
539+
framework.Logf("Recovering %s pool from degraded state", mcpName)
540+
541+
// Get nodes from degraded MCP & update the desired config of the degraded node to force a recovery update
542+
nodes, nodeErr := GetNodesByRole(oc, mcpName)
543+
o.Expect(nodeErr).NotTo(o.HaveOccurred())
544+
o.Expect(nodes).ShouldNot(o.BeEmpty())
545+
for _, node := range nodes {
546+
framework.Logf("Restoring desired config for node: %s", node.Name)
547+
state := node.Annotations["machineconfiguration.openshift.io/state"]
548+
if state == "Done" {
549+
framework.Logf("Node %s is updated and does not need to be recovered", node.Name)
550+
} else {
551+
err := restoreDesiredConfig(oc, node)
552+
if err != nil {
553+
return fmt.Errorf("error restoring desired config in node %s. Error: %s", node.Name, err)
554+
}
555+
}
556+
}
557+
558+
// Wait for MCP to not be in degraded status
559+
mcpErr := waitForMCPConditionStatus(oc, mcpName, "Degraded", "False")
560+
o.Expect(mcpErr).NotTo(o.HaveOccurred(), fmt.Sprintf("could not recover %v MCP from the degraded status.", mcpName))
561+
mcpErr = waitForMCPConditionStatus(oc, mcpName, "Updated", "True")
562+
o.Expect(mcpErr).NotTo(o.HaveOccurred(), fmt.Sprintf("%v MCP could not reach an updated state.", mcpName))
563+
return nil
564+
}
565+
566+
// TODO: generalize with get node status to just pass in the general node annotation label
567+
func getCurrentMachineConfig(node corev1.Node) string {
568+
return node.Annotations["machineconfiguration.openshift.io/currentConfig"]
569+
}
570+
571+
// `restoreDesiredConfig` updates the value of a node's desiredConfig annotation to be equal to the value of its currentConfig (desiredConfig=currentConfig)
572+
func restoreDesiredConfig(oc *exutil.CLI, node corev1.Node) error {
573+
// Get current config
574+
currentConfig := getCurrentMachineConfig(node)
575+
if currentConfig == "" {
576+
return fmt.Errorf("currentConfig annotation is empty for node %s", node.Name)
577+
}
578+
579+
// Update desired config to be equal to current config
580+
framework.Logf("Node: %s is restoring desiredConfig value to match currentConfig value: %s", node.Name, currentConfig)
581+
configErr := oc.Run("patch").Args(fmt.Sprintf("node/%v", node.Name), "--patch", fmt.Sprintf(`{"metadata":{"annotations":{"machineconfiguration.openshift.io/desiredConfig":"%v"}}}`, currentConfig), "--type=merge").Execute()
582+
return configErr
583+
}

test/extended/machine_config/machine_config_node.go

+46-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ var _ = g.Describe("[sig-mco][OCPFeatureGate:MachineConfigNode][Serial]", func()
3131
MCOMachineConfigBaseDir = exutil.FixturePath("testdata", "machine_config", "machineconfig")
3232
infraMCPFixture = filepath.Join(MCOMachineConfigPoolBaseDir, "infra-mcp.yaml")
3333
testFileMCFixture = filepath.Join(MCOMachineConfigBaseDir, "0-worker-mc.yaml")
34+
invalidMCFixture = filepath.Join(MCOMachineConfigBaseDir, "1-worker-invalid-mc.yaml")
3435
oc = exutil.NewCLIWithoutNamespace("machine-config")
3536
)
3637

@@ -41,6 +42,10 @@ var _ = g.Describe("[sig-mco][OCPFeatureGate:MachineConfigNode][Serial]", func()
4142
g.It("Should properly transition through MCN conditions on node update [apigroup:machineconfiguration.openshift.io]", func() {
4243
ValidateMCNConditionTransitions(oc, testFileMCFixture)
4344
})
45+
46+
g.It("Should properly report MCN conditions on node degrade [apigroup:machineconfiguration.openshift.io]", func() {
47+
ValidateMCNConditionOnNodeDegrade(oc, invalidMCFixture)
48+
})
4449
})
4550

4651
// `ValidateMCNProperties` checks that MCN properties match the corresponding node properties
@@ -158,7 +163,7 @@ func ValidateMCNConditionTransitions(oc *exutil.CLI, fixture string) {
158163

159164
// Delete MC on failure or test completion
160165
defer func() {
161-
deleteMCErr := oc.Run("delete").Args("machineconfig", "99-worker-testfile").Execute()
166+
deleteMCErr := oc.Run("delete").Args("machineconfig", "90-worker-testfile").Execute()
162167
o.Expect(deleteMCErr).NotTo(o.HaveOccurred())
163168
}()
164169

@@ -168,6 +173,7 @@ func ValidateMCNConditionTransitions(oc *exutil.CLI, fixture string) {
168173
workerNode := updatingNodes[0]
169174

170175
// Validate transition through conditions for MCN
176+
// TODO: make consts for the statuses
171177
framework.Logf("Checking Updated=False")
172178
err := waitForMCNConditionStatus(clientSet, workerNode.Name, "Updated", "False", 1*time.Minute, 5*time.Second)
173179
o.Expect(err).NotTo(o.HaveOccurred())
@@ -183,7 +189,6 @@ func ValidateMCNConditionTransitions(oc *exutil.CLI, fixture string) {
183189
framework.Logf("Checking Drained=Unknown")
184190
err = waitForMCNConditionStatus(clientSet, workerNode.Name, "Drained", "Unknown", 30*time.Second, 2*time.Second)
185191
o.Expect(err).NotTo(o.HaveOccurred())
186-
// Failing here!
187192
framework.Logf("Checking Drained=True")
188193
err = waitForMCNConditionStatus(clientSet, workerNode.Name, "Drained", "True", 7*time.Minute, 10*time.Second)
189194
o.Expect(err).NotTo(o.HaveOccurred())
@@ -222,3 +227,42 @@ func ValidateMCNConditionTransitions(oc *exutil.CLI, fixture string) {
222227
framework.Logf("Checking all conditions other than 'Updated' are False.")
223228
o.Expect(confirmUpdatedMCNStatus(clientSet, workerNode.Name)).Should(o.BeTrue())
224229
}
230+
231+
// `ValidateMCNConditionOnNodeDegrade` check that Conditions properly update on a node update
232+
func ValidateMCNConditionOnNodeDegrade(oc *exutil.CLI, fixture string) {
233+
// Create client set for test
234+
clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig())
235+
o.Expect(clientErr).NotTo(o.HaveOccurred())
236+
237+
// Apply invalid MC targeting worker pool
238+
mcErr := oc.Run("apply").Args("-f", fixture).Execute()
239+
o.Expect(mcErr).NotTo(o.HaveOccurred())
240+
241+
// Cleanup MC and fix node degradation on failure or test completion
242+
defer func() {
243+
// Delete the applied MC
244+
deleteMCErr := oc.Run("delete").Args("machineconfig", "91-worker-testfile-invalid").Execute()
245+
o.Expect(deleteMCErr).NotTo(o.HaveOccurred())
246+
247+
// Recover the degraded MCP
248+
recoverErr := recoverFromDegraded(oc, worker)
249+
o.Expect(recoverErr).NotTo(o.HaveOccurred())
250+
}()
251+
252+
// Wait for worker MCP to be in a degraded state with one degraded machine
253+
// TODO consolidate into helper func that doesn't require getting MCP more than required
254+
o.Expect(waitForMCPConditionStatus(oc, worker, "Degraded", "True")).NotTo(o.HaveOccurred(), "Error waiting for %v MCP to be in a degraded state.")
255+
workerMcp, err := clientSet.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), worker, metav1.GetOptions{})
256+
o.Expect(err).NotTo(o.HaveOccurred(), "Error getting %v MCP.", worker)
257+
o.Expect(workerMcp.Status.DegradedMachineCount).To(o.BeNumerically("==", 1), "Degraded machine count is not 1.")
258+
259+
// Get degraded worker node
260+
degradedNode, degradedNodeErr := GetDegradedNode(oc, worker)
261+
o.Expect(degradedNodeErr).NotTo(o.HaveOccurred(), "Error getting degraded node for %v MCP.", worker)
262+
263+
// Validate MCN of degraded node
264+
degradedNodeMCN, degradedErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), degradedNode.Name, metav1.GetOptions{})
265+
o.Expect(degradedErr).NotTo(o.HaveOccurred())
266+
o.Expect(checkMCNConditionStatus(degradedNodeMCN, "AppliedFilesAndOS", "Unknown"))
267+
o.Expect(checkMCNConditionStatus(degradedNodeMCN, "UpdateExecuted", "Unknown"))
268+
}

test/extended/testdata/bindata.go

+41-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/extended/testdata/machine_config/machineconfig/0-worker-mc.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ kind: MachineConfig
33
metadata:
44
labels:
55
machineconfiguration.openshift.io/role: worker
6-
name: 99-worker-testfile
6+
name: 90-worker-testfile
77
spec:
88
config:
99
ignition:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: machineconfiguration.openshift.io/v1
2+
kind: MachineConfig
3+
metadata:
4+
labels:
5+
machineconfiguration.openshift.io/role: worker
6+
name: 91-worker-testfile-invalid
7+
spec:
8+
config:
9+
ignition:
10+
version: 3.2.0
11+
storage:
12+
files:
13+
- contents:
14+
source: data:text/plain;charset=utf-8;base64,dGVzdA==
15+
mode: 420
16+
path: /etc/wronguser-test-file.test
17+
user:
18+
name: wronguser
19+

test/extended/util/annotate/generated/zz_generated.annotations.go

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

zz_generated.manifests/test-reporting.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ spec:
102102
tests:
103103
- testName: '[sig-mco][OCPFeatureGate:MachineConfigNode][Serial] Should have MCN
104104
properties matching associated node properties [apigroup:machineconfiguration.openshift.io]'
105+
- testName: '[sig-mco][OCPFeatureGate:MachineConfigNode][Serial] Should properly
106+
report MCN conditions on node degrade [apigroup:machineconfiguration.openshift.io]'
105107
- testName: '[sig-mco][OCPFeatureGate:MachineConfigNode][Serial] Should properly
106108
transition through MCN conditions on node update [apigroup:machineconfiguration.openshift.io]'
107109
- featureGate: ManagedBootImages

0 commit comments

Comments
 (0)