@@ -48,6 +48,8 @@ import (
48
48
clientretry "k8s.io/client-go/util/retry"
49
49
"k8s.io/client-go/util/workqueue"
50
50
"k8s.io/klog/v2"
51
+
52
+ buildconstants "github.com/openshift/machine-config-operator/pkg/controller/build/constants"
51
53
)
52
54
53
55
const (
@@ -888,8 +890,28 @@ func (ctrl *Controller) handleErr(err error, key string) {
888
890
// 2. If a MachineConfig changes, we should wait for the OS image build to be
889
891
// ready so we can update both the nodes' desired MachineConfig and desired
890
892
// image annotations simultaneously.
893
+ func (ctrl * Controller ) getConfigAndBuildAndLayeredStatus (pool * mcfgv1.MachineConfigPool ) (* mcfgv1.MachineOSConfig , * mcfgv1.MachineOSBuild , bool , error ) {
894
+ mosc , mosb , err := ctrl .getConfigAndBuild (pool )
895
+ // If we attempt to list resources which are not present either because none
896
+ // exist or they're behind an inactive feature gate, they will return an
897
+ // IsNotFound error. Any other errors should be returned to the caller.
898
+ if err != nil && ! errors .IsNotFound (err ) {
899
+ return nil , nil , false , err
900
+ }
901
+
902
+ isLayered , err := ctrl .isLayeredPool (mosc , mosb )
903
+ if err != nil {
904
+ return nil , nil , false , fmt .Errorf ("Failed to determine whether pool %s opts in to OCL due to an error: %s" , pool .Name , err )
905
+ }
891
906
892
- func (ctrl * Controller ) GetConfigAndBuild (pool * mcfgv1.MachineConfigPool ) (* mcfgv1.MachineOSConfig , * mcfgv1.MachineOSBuild , error ) {
907
+ return mosc , mosb , isLayered , nil
908
+ }
909
+
910
+ func (ctrl * Controller ) getConfigAndBuild (pool * mcfgv1.MachineConfigPool ) (* mcfgv1.MachineOSConfig , * mcfgv1.MachineOSBuild , error ) {
911
+ // TODO: We should use the selectors from the build controller since they are
912
+ // well-tested and makes querying for this information significantly easier.
913
+ // Additionally, this should use listers instead of API clients in order to
914
+ // reduce the impact on the API server.
893
915
var ourConfig * mcfgv1.MachineOSConfig
894
916
var ourBuild * mcfgv1.MachineOSBuild
895
917
configList , err := ctrl .client .MachineconfigurationV1 ().MachineOSConfigs ().List (context .TODO (), metav1.ListOptions {})
@@ -898,6 +920,7 @@ func (ctrl *Controller) GetConfigAndBuild(pool *mcfgv1.MachineConfigPool) (*mcfg
898
920
}
899
921
900
922
for _ , config := range configList .Items {
923
+ config := config
901
924
if config .Spec .MachineConfigPool .Name == pool .Name {
902
925
ourConfig = & config
903
926
break
@@ -914,24 +937,27 @@ func (ctrl *Controller) GetConfigAndBuild(pool *mcfgv1.MachineConfigPool) (*mcfg
914
937
}
915
938
916
939
for _ , build := range buildList .Items {
917
- if build .Spec .MachineOSConfig .Name == ourConfig .Name {
918
- if build .Spec .MachineConfig .Name == pool .Spec .Configuration .Name {
919
- ourBuild = & build
920
- break
921
- }
940
+ build := build
941
+ if build .Spec .MachineOSConfig .Name == ourConfig .Name && build .Spec .MachineConfig .Name == pool .Spec .Configuration .Name {
942
+ ourBuild = & build
943
+ break
922
944
}
923
945
}
924
946
925
947
return ourConfig , ourBuild , nil
926
-
927
948
}
928
949
929
- func (ctrl * Controller ) canLayeredPoolContinue (pool * mcfgv1.MachineConfigPool ) (string , bool , error ) {
930
-
931
- mosc , mosb , _ := ctrl .GetConfigAndBuild (pool )
950
+ func (ctrl * Controller ) canLayeredPoolContinue (pool * mcfgv1.MachineConfigPool , mosc * mcfgv1.MachineOSConfig , mosb * mcfgv1.MachineOSBuild ) (string , bool , error ) {
951
+ // This is an edgecase which we should ideally never hit. However, it is
952
+ // better to anticipate it and have an error message ready vs. the
953
+ // alternative.
954
+ if mosc == nil && mosb != nil {
955
+ msg := fmt .Sprintf ("orphaned MachineOSBuild %q found, but MachineOSConfig %q not found" , mosb .Name , mosb .Labels [buildconstants .MachineOSConfigNameLabelKey ])
956
+ return msg , false , fmt .Errorf (msg )
957
+ }
932
958
933
- if mosc == nil || mosb == nil {
934
- return "No MachineOSConfig or Build for this pool" , false , nil
959
+ if ! ctrl . isConfigAndBuildPresent ( mosc , mosb ) {
960
+ return "No MachineOSConfig or MachineOSBuild for this pool" , false , nil
935
961
}
936
962
937
963
cs := ctrlcommon .NewMachineOSConfigState (mosc )
@@ -941,15 +967,18 @@ func (ctrl *Controller) canLayeredPoolContinue(pool *mcfgv1.MachineConfigPool) (
941
967
pullspec := cs .GetOSImage ()
942
968
943
969
if ! hasImage {
944
- return "Desired Image not set in MachineOSBuild " , false , nil
970
+ return "Desired image not set in MachineOSConfig " , false , nil
945
971
}
946
972
947
973
switch {
948
- // If the build is successful and we have the image pullspec, we can proceed
974
+ // If the build is successful and the MachineOSConfig has the matching pullspec, we can proceed
949
975
// with rolling out the new OS image.
950
- case bs .IsBuildSuccess () && hasImage :
976
+ case bs .IsBuildSuccess () && hasImage && cs . MachineOSBuildIsCurrent ( mosb ) :
951
977
msg := fmt .Sprintf ("Image built successfully, pullspec: %s" , pullspec )
952
978
return msg , true , nil
979
+ case bs .IsBuildSuccess () && hasImage && ! cs .MachineOSBuildIsCurrent (mosb ):
980
+ msg := fmt .Sprintf ("Image built successfully, pullspec: %s, but MachineOSConfig %q has not updated yet" , pullspec , mosc .Name )
981
+ return msg , false , nil
953
982
case bs .IsBuildPending ():
954
983
return "Image build pending" , false , nil
955
984
case bs .IsBuilding ():
@@ -1015,14 +1044,13 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
1015
1044
return ctrl .syncStatusOnly (pool )
1016
1045
}
1017
1046
1018
- mosc , mosb , _ := ctrl .GetConfigAndBuild (pool )
1019
- layered , err := ctrl .IsLayeredPool (mosc , mosb )
1047
+ mosc , mosb , layered , err := ctrl .getConfigAndBuildAndLayeredStatus (pool )
1020
1048
if err != nil {
1021
- return fmt .Errorf ("Failed to determine whether pool %s opts in to OCL due to an error : %s" , pool . Name , err )
1049
+ return fmt .Errorf ("could not get config and build : %w" , err )
1022
1050
}
1023
1051
1024
1052
if layered {
1025
- reason , canApplyUpdates , err := ctrl .canLayeredPoolContinue (pool )
1053
+ reason , canApplyUpdates , err := ctrl .canLayeredPoolContinue (pool , mosc , mosb )
1026
1054
if err != nil {
1027
1055
klog .Infof ("Layered pool %s encountered an error: %s" , pool .Name , err )
1028
1056
return err
@@ -1095,7 +1123,7 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
1095
1123
}
1096
1124
}
1097
1125
ctrl .logPool (pool , "%d candidate nodes in %d zones for update, capacity: %d" , len (candidates ), len (zones ), capacity )
1098
- if err := ctrl .updateCandidateMachines (pool , candidates , capacity ); err != nil {
1126
+ if err := ctrl .updateCandidateMachines (layered , mosc , mosb , pool , candidates , capacity ); err != nil {
1099
1127
if syncErr := ctrl .syncStatusOnly (pool ); syncErr != nil {
1100
1128
errs := kubeErrs .NewAggregate ([]error {syncErr , err })
1101
1129
return fmt .Errorf ("error setting annotations for pool %q, sync error: %w" , pool .Name , errs )
@@ -1184,7 +1212,7 @@ func (ctrl *Controller) updateCandidateNode(mosc *mcfgv1.MachineOSConfig, mosb *
1184
1212
}
1185
1213
1186
1214
lns := ctrlcommon .NewLayeredNodeState (oldNode )
1187
- layered , err := ctrl .IsLayeredPool (mosc , mosb )
1215
+ layered , err := ctrl .isLayeredPool (mosc , mosb )
1188
1216
if err != nil {
1189
1217
return fmt .Errorf ("Failed to determine whether pool %s opts in to OCL due to an error: %s" , pool .Name , err )
1190
1218
}
@@ -1330,9 +1358,9 @@ func (ctrl *Controller) filterControlPlaneCandidateNodes(pool *mcfgv1.MachineCon
1330
1358
}
1331
1359
1332
1360
// SetDesiredStateFromPool in old mco explains how this works. Somehow you need to NOT FAIL if the mosb doesn't exist. So
1333
- // we still need to base this whole things on pools but IsLayeredPool == does mosb exist
1361
+ // we still need to base this whole things on pools but isLayeredPool == does mosb exist
1334
1362
// updateCandidateMachines sets the desiredConfig annotation the candidate machines
1335
- func (ctrl * Controller ) updateCandidateMachines (pool * mcfgv1.MachineConfigPool , candidates []* corev1.Node , capacity uint ) error {
1363
+ func (ctrl * Controller ) updateCandidateMachines (layered bool , mosc * mcfgv1. MachineOSConfig , mosb * mcfgv1. MachineOSBuild , pool * mcfgv1.MachineConfigPool , candidates []* corev1.Node , capacity uint ) error {
1336
1364
if pool .Name == ctrlcommon .MachineConfigPoolMaster {
1337
1365
var err error
1338
1366
candidates , capacity , err = ctrl .filterControlPlaneCandidateNodes (pool , candidates , capacity )
@@ -1351,25 +1379,20 @@ func (ctrl *Controller) updateCandidateMachines(pool *mcfgv1.MachineConfigPool,
1351
1379
candidates = candidates [:capacity ]
1352
1380
}
1353
1381
1354
- return ctrl .setDesiredAnnotations (pool , candidates )
1382
+ return ctrl .setDesiredAnnotations (layered , mosc , mosb , pool , candidates )
1355
1383
}
1356
1384
1357
- func (ctrl * Controller ) setDesiredAnnotations (pool * mcfgv1.MachineConfigPool , candidates []* corev1.Node ) error {
1385
+ func (ctrl * Controller ) setDesiredAnnotations (layered bool , mosc * mcfgv1. MachineOSConfig , mosb * mcfgv1. MachineOSBuild , pool * mcfgv1.MachineConfigPool , candidates []* corev1.Node ) error {
1358
1386
eventName := "SetDesiredConfig"
1359
1387
updateName := fmt .Sprintf ("MachineConfig: %s" , pool .Spec .Configuration .Name )
1360
- config , build , _ := ctrl .GetConfigAndBuild (pool )
1361
- layered , err := ctrl .IsLayeredPool (config , build )
1362
1388
1363
- if err != nil {
1364
- return fmt .Errorf ("Failed to determine whether pool %s opts in to OCL due to an error: %s" , pool .Name , err )
1365
- }
1366
1389
if layered {
1367
1390
eventName = "SetDesiredConfigAndOSImage"
1368
- updateName = fmt .Sprintf ("%s / Image: %s" , updateName , ctrlcommon .NewMachineOSConfigState (config ).GetOSImage ())
1391
+ updateName = fmt .Sprintf ("%s / Image: %s" , updateName , ctrlcommon .NewMachineOSConfigState (mosc ).GetOSImage ())
1369
1392
klog .Infof ("Continuing to sync layered MachineConfigPool %s" , pool .Name )
1370
1393
}
1371
1394
for _ , node := range candidates {
1372
- if err := ctrl .updateCandidateNode (config , build , node .Name , pool ); err != nil {
1395
+ if err := ctrl .updateCandidateNode (mosc , mosb , node .Name , pool ); err != nil {
1373
1396
return fmt .Errorf ("setting desired %s for node %s: %w" , pool .Spec .Configuration .Name , node .Name , err )
1374
1397
}
1375
1398
}
@@ -1507,10 +1530,18 @@ func getErrorString(err error) string {
1507
1530
return ""
1508
1531
}
1509
1532
1510
- func (ctrl * Controller ) IsLayeredPool (mosc * mcfgv1.MachineOSConfig , mosb * mcfgv1.MachineOSBuild ) (bool , error ) {
1533
+ func (ctrl * Controller ) isLayeredPool (mosc * mcfgv1.MachineOSConfig , mosb * mcfgv1.MachineOSBuild ) (bool , error ) {
1511
1534
fg , err := ctrl .fgAcessor .CurrentFeatureGates ()
1512
1535
if err != nil {
1513
1536
return false , err
1514
1537
}
1515
- return (mosc != nil || mosb != nil ) && fg .Enabled (features .FeatureGateOnClusterBuild ), nil
1538
+ return ctrl .isConfigOrBuildPresent (mosc , mosb ) && fg .Enabled (features .FeatureGateOnClusterBuild ), nil
1539
+ }
1540
+
1541
+ func (ctrl * Controller ) isConfigOrBuildPresent (mosc * mcfgv1.MachineOSConfig , mosb * mcfgv1.MachineOSBuild ) bool {
1542
+ return (mosc != nil || mosb != nil )
1543
+ }
1544
+
1545
+ func (ctrl * Controller ) isConfigAndBuildPresent (mosc * mcfgv1.MachineOSConfig , mosb * mcfgv1.MachineOSBuild ) bool {
1546
+ return (mosc != nil && mosb != nil )
1516
1547
}
0 commit comments