@@ -3,6 +3,7 @@ package machine_config
3
3
import (
4
4
"context"
5
5
"encoding/json"
6
+ "errors"
6
7
"fmt"
7
8
"math/rand"
8
9
"time"
@@ -39,6 +40,7 @@ const (
39
40
)
40
41
41
42
// TODO: add error message returns for `.NotTo(o.HaveOccurred())` cases.
43
+ // TODO: fix caplitalization of helper funcs
42
44
43
45
// skipUnlessTargetPlatform skips the test if it is running on the target platform
44
46
func skipUnlessTargetPlatform (oc * exutil.CLI , platformType osconfigv1.PlatformType ) {
@@ -401,7 +403,7 @@ func WaitForMCPToBeReady(oc *exutil.CLI, machineConfigClient *machineconfigclien
401
403
return nil
402
404
}
403
405
404
- // GetCordonedNodes get cordoned nodes (if maxUnavailable > 1 ) otherwise return the 1st cordoned node
406
+ // ` GetCordonedNodes` get cordoned nodes (if maxUnavailable > 1 ) otherwise return the 1st cordoned node
405
407
func GetCordonedNodes (oc * exutil.CLI , mcpName string ) []corev1.Node {
406
408
// Wait for the MCP to start updating
407
409
o .Expect (waitForMCPConditionStatus (oc , mcpName , "Updating" , "True" )).NotTo (o .HaveOccurred (), "Waiting for 'Updating' status change failed." )
@@ -413,6 +415,7 @@ func GetCordonedNodes(oc *exutil.CLI, mcpName string) []corev1.Node {
413
415
o .Expect (nodeErr ).NotTo (o .HaveOccurred (), "Error getting nodes from %v MCP." , mcpName )
414
416
o .Expect (nodes ).ShouldNot (o .BeEmpty (), "No nodes found for %v MCP." , mcpName )
415
417
418
+ // TOOD: cleanup
416
419
for _ , node := range nodes {
417
420
unschedulable := node .Spec .Unschedulable
418
421
if unschedulable {
@@ -451,27 +454,30 @@ func waitForMCPConditionStatus(oc *exutil.CLI, mcpName string, conditionType mcf
451
454
452
455
framework .Logf ("Waiting for %v MCP's %v condition to be %v." , mcp .Name , conditionType , status )
453
456
return false
454
- }, 5 * time .Minute , 3 * time .Second ).Should (o .BeTrue ())
457
+ }, 2 * time .Minute , 3 * time .Second ).Should (o .BeTrue ())
455
458
return nil
456
459
}
457
460
458
461
// `waitForMCNConditionStatus` waits until the desired MCN condition matches the desired status (ex. wait until "Updated" is "False")
459
462
func waitForMCNConditionStatus (clientSet * machineconfigclient.Clientset , mcnName string , conditionType string , status metav1.ConditionStatus , timeout time.Duration , interval time.Duration ) error {
460
463
o .Eventually (func () bool {
461
- // Get MCN & desried condition status
464
+ framework .Logf ("Waiting for MCN %v %v condition to be %v." , mcnName , conditionType , status )
465
+
466
+ // Get MCN & check if the MCN condition status matches the desired status
462
467
workerNodeMCN , workerErr := clientSet .MachineconfigurationV1alpha1 ().MachineConfigNodes ().Get (context .TODO (), mcnName , metav1.GetOptions {})
463
468
o .Expect (workerErr ).NotTo (o .HaveOccurred ())
464
- conditionStatus := getMCNConditionStatus (workerNodeMCN , conditionType )
465
- if conditionStatus == status {
466
- return true
467
- }
468
-
469
- framework .Logf ("Waiting for MCN %v %v condition to be %v." , mcnName , conditionType , status )
470
- return false
469
+ return checkMCNConditionStatus (workerNodeMCN , conditionType , status )
471
470
}, timeout , interval ).Should (o .BeTrue ())
472
471
return nil
473
472
}
474
473
474
+ // `checkMCNConditionStatus` checks that an MCN condition matches the desired status (ex. confirm "Updated" is "False")
475
+ func checkMCNConditionStatus (mcn * v1alpha1.MachineConfigNode , conditionType string , status metav1.ConditionStatus ) bool {
476
+ conditionStatus := getMCNConditionStatus (mcn , conditionType )
477
+ framework .Logf ("MCN %v %v condition is %v." , mcn .Name , conditionType , conditionStatus )
478
+ return conditionStatus == status
479
+ }
480
+
475
481
// `getMCNConditionStatus` returns the status of the desired condition type for MCN, or an empty string if the condition does not exist
476
482
func getMCNConditionStatus (mcn * v1alpha1.MachineConfigNode , conditionType string ) metav1.ConditionStatus {
477
483
// Loop through conditions and return the status of the desired condition type
@@ -508,3 +514,70 @@ func confirmUpdatedMCNStatus(clientSet *machineconfigclient.Clientset, mcnName s
508
514
framework .Logf ("Node %s update is complete and MCN is valid." , mcnName )
509
515
return true
510
516
}
517
+
518
+ // TODO: consolidate with similar functions
519
+ func GetDegradedNode (oc * exutil.CLI , mcpName string ) (corev1.Node , error ) {
520
+ // Get nodes in desired pool
521
+ nodes , nodeErr := GetNodesByRole (oc , mcpName )
522
+ o .Expect (nodeErr ).NotTo (o .HaveOccurred ())
523
+ o .Expect (nodes ).ShouldNot (o .BeEmpty ())
524
+
525
+ // Get degraded node
526
+ for _ , node := range nodes {
527
+ // TODO: create generalized get node state helper
528
+ state := node .Annotations ["machineconfiguration.openshift.io/state" ]
529
+ if state == "Degraded" {
530
+ return node , nil
531
+ }
532
+ }
533
+
534
+ return corev1.Node {}, errors .New ("no degraded node found" )
535
+ }
536
+
537
+ // `recoverFromDegraded` updates the current and desired machine configs so that the pool can recover from degraded state once the offending MC is deleted
538
+ func recoverFromDegraded (oc * exutil.CLI , mcpName string ) error {
539
+ framework .Logf ("Recovering %s pool from degraded state" , mcpName )
540
+
541
+ // Get nodes from degraded MCP & update the desired config of the degraded node to force a recovery update
542
+ nodes , nodeErr := GetNodesByRole (oc , mcpName )
543
+ o .Expect (nodeErr ).NotTo (o .HaveOccurred ())
544
+ o .Expect (nodes ).ShouldNot (o .BeEmpty ())
545
+ for _ , node := range nodes {
546
+ framework .Logf ("Restoring desired config for node: %s" , node .Name )
547
+ state := node .Annotations ["machineconfiguration.openshift.io/state" ]
548
+ if state == "Done" {
549
+ framework .Logf ("Node %s is updated and does not need to be recovered" , node .Name )
550
+ } else {
551
+ err := restoreDesiredConfig (oc , node )
552
+ if err != nil {
553
+ return fmt .Errorf ("error restoring desired config in node %s. Error: %s" , node .Name , err )
554
+ }
555
+ }
556
+ }
557
+
558
+ // Wait for MCP to not be in degraded status
559
+ mcpErr := waitForMCPConditionStatus (oc , mcpName , "Degraded" , "False" )
560
+ o .Expect (mcpErr ).NotTo (o .HaveOccurred (), fmt .Sprintf ("could not recover %v MCP from the degraded status." , mcpName ))
561
+ mcpErr = waitForMCPConditionStatus (oc , mcpName , "Updated" , "True" )
562
+ o .Expect (mcpErr ).NotTo (o .HaveOccurred (), fmt .Sprintf ("%v MCP could not reach an updated state." , mcpName ))
563
+ return nil
564
+ }
565
+
566
+ // TODO: generalize with get node status to just pass in the general node annotation label
567
+ func getCurrentMachineConfig (node corev1.Node ) string {
568
+ return node .Annotations ["machineconfiguration.openshift.io/currentConfig" ]
569
+ }
570
+
571
+ // `restoreDesiredConfig` updates the value of a node's desiredConfig annotation to be equal to the value of its currentConfig (desiredConfig=currentConfig)
572
+ func restoreDesiredConfig (oc * exutil.CLI , node corev1.Node ) error {
573
+ // Get current config
574
+ currentConfig := getCurrentMachineConfig (node )
575
+ if currentConfig == "" {
576
+ return fmt .Errorf ("currentConfig annotation is empty for node %s" , node .Name )
577
+ }
578
+
579
+ // Update desired config to be equal to current config
580
+ framework .Logf ("Node: %s is restoring desiredConfig value to match currentConfig value: %s" , node .Name , currentConfig )
581
+ configErr := oc .Run ("patch" ).Args (fmt .Sprintf ("node/%v" , node .Name ), "--patch" , fmt .Sprintf (`{"metadata":{"annotations":{"machineconfiguration.openshift.io/desiredConfig":"%v"}}}` , currentConfig ), "--type=merge" ).Execute ()
582
+ return configErr
583
+ }
0 commit comments