Skip to content

Commit a67ba70

Browse files
committed
reconciler: only build mosb's currently targeted by the MCP
osbuildcontroller_test: Unit testing for cascading failure In OCPBUGS-45496, here are the steps which lead to the failure of the creation of an MOSB upon the addition of a new MC: 1) An MC with erroneous contents not caught by the API validations creates a rendered-MC which triggers a MOSB build and fails as expected with an error 2) This erroneous MOSB keeps getting added to the rate limited worker queue till it hits the max retries. Then it is forgotten from the queue and is subjected to a backoff time to get added back again 3) In the meantime if the erroneous MC is deleted and a new valid MC is added targetting the same MCP, a valid MOSB build starts 4) When the erroneous mosb enters the queue again and sees that there already exists another not successfull MOSB in build, it cancels all other builds. Hence the valid MOSB is cancelled and the erroneous MOSB is re-triggerred again. Since it will never be able to start the build and fail again the steps 1 and 2 keep happening. Any new MC will fail to successfully create a MOSB and trigger a build. Thus the solution is to check upon the MOSB sync whether the MCP and rendered-MC that the MOSB targets even exists anymore. If not we don't need to build it.
1 parent 6778953 commit a67ba70

File tree

2 files changed

+97
-13
lines changed

2 files changed

+97
-13
lines changed

pkg/controller/build/osbuildcontroller_test.go

Lines changed: 84 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func TestOSBuildControllerDoesNothing(t *testing.T) {
4141
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
4242
t.Cleanup(cancel)
4343

44-
_, mcfgclient, _, _ := setupOSBuildControllerForTest(ctx, t)
44+
_, mcfgclient, _, _, _ := setupOSBuildControllerForTest(ctx, t)
4545

4646
// i needs to be set to 2 because rendered-worker-1 already exists.
4747
for i := 2; i <= 10; i++ {
@@ -67,7 +67,7 @@ func TestOSBuildControllerDeletesRunningBuildBeforeStartingANewOne(t *testing.T)
6767
t.Run("MachineOSConfig change", func(t *testing.T) {
6868
t.Parallel()
6969

70-
kubeclient, mcfgclient, mosc, initialMosb, mcp, kubeassert := setupOSBuildControllerForTestWithRunningBuild(ctx, t, poolName)
70+
kubeclient, mcfgclient, mosc, initialMosb, mcp, kubeassert, _ := setupOSBuildControllerForTestWithRunningBuild(ctx, t, poolName)
7171

7272
// Now that the build is in the running state, we update the MachineOSConfig.
7373
apiMosc := testhelpers.SetContainerfileContentsOnMachineOSConfig(ctx, t, mcfgclient, mosc, "FROM configs AS final\nRUN echo 'helloworld' > /etc/helloworld")
@@ -99,7 +99,7 @@ func TestOSBuildControllerDeletesRunningBuildBeforeStartingANewOne(t *testing.T)
9999
t.Run("MachineConfig change", func(t *testing.T) {
100100
t.Parallel()
101101

102-
kubeclient, mcfgclient, mosc, initialMosb, mcp, kubeassert := setupOSBuildControllerForTestWithRunningBuild(ctx, t, poolName)
102+
kubeclient, mcfgclient, mosc, initialMosb, mcp, kubeassert, _ := setupOSBuildControllerForTestWithRunningBuild(ctx, t, poolName)
103103

104104
apiMCP := insertNewRenderedMachineConfigAndUpdatePool(ctx, t, mcfgclient, mosc.Spec.MachineConfigPool.Name, "rendered-worker-2")
105105

@@ -461,6 +461,77 @@ func TestOSBuildControllerRebuildAnnotation(t *testing.T) {
461461

462462
}
463463

464+
func TestOSBuildControllerBuildFailedDoesNotCascade(t *testing.T) {
465+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
466+
t.Cleanup(cancel)
467+
468+
poolName := "worker"
469+
faultyMC := "rendered-undesiredFaultyMC"
470+
471+
// Create a MOSC to enable OCL and let it produce a new MOSB in Running State
472+
_, mcfgclient, mosc, mosb, mcp, _, ctrl := setupOSBuildControllerForTestWithRunningBuild(ctx, t, poolName)
473+
assertMachineOSConfigGetsCurrentBuildAnnotation(ctx, t, mcfgclient, mosc, mosb)
474+
475+
found := func(item *mcfgv1alpha1.MachineOSBuild, list []mcfgv1alpha1.MachineOSBuild) bool {
476+
for _, m := range list {
477+
if m.Name == item.Name {
478+
return true
479+
}
480+
}
481+
return false
482+
}
483+
484+
mosbList, err := mcfgclient.MachineconfigurationV1alpha1().MachineOSBuilds().List(ctx, metav1.ListOptions{})
485+
require.NoError(t, err)
486+
if !found(mosb, mosbList.Items) {
487+
t.Errorf("Expected %v to be in the list %v", mosb.Name, mosbList.Items)
488+
}
489+
490+
// This faultyMC represents an older Machine config that passed through API validation checks but if a MOSB (name oldMOSB) were to be built, it would fail to start a job. Hence over here a MC is added but the MCP is not targetting this MCP.
491+
insertNewRenderedMachineConfig(ctx, t, mcfgclient, poolName, faultyMC)
492+
now := metav1.Now()
493+
oldMosb := &mcfgv1alpha1.MachineOSBuild{
494+
TypeMeta: metav1.TypeMeta{
495+
Kind: "MachineOSBuild",
496+
APIVersion: "machineconfiguration.openshift.io/v1alpha1",
497+
},
498+
ObjectMeta: metav1.ObjectMeta{
499+
Name: "undesiredAndForgottenMOSB",
500+
Labels: map[string]string{
501+
constants.TargetMachineConfigPoolLabelKey: mcp.Name,
502+
constants.RenderedMachineConfigLabelKey: faultyMC,
503+
constants.MachineOSConfigNameLabelKey: mosc.Name,
504+
},
505+
},
506+
Spec: mcfgv1alpha1.MachineOSBuildSpec{
507+
RenderedImagePushspec: "randRef",
508+
Version: 1,
509+
ConfigGeneration: 1,
510+
DesiredConfig: mcfgv1alpha1.RenderedMachineConfigReference{
511+
Name: faultyMC,
512+
},
513+
MachineOSConfig: mcfgv1alpha1.MachineOSConfigReference{
514+
Name: mosc.Name,
515+
},
516+
},
517+
Status: mcfgv1alpha1.MachineOSBuildStatus{
518+
BuildStart: &now,
519+
},
520+
}
521+
522+
// Enqueue another old and un-targeted MOSB to the osbuildcontroller
523+
_, err = mcfgclient.MachineconfigurationV1alpha1().MachineOSBuilds().Create(ctx, oldMosb, metav1.CreateOptions{})
524+
require.NoError(t, err)
525+
ctrl.buildReconciler.AddMachineOSBuild(ctx, oldMosb)
526+
527+
// Assert that the original MOSB which is derived from the current rendered MC that the MCP targets is still building and untouched
528+
mosbList, err = mcfgclient.MachineconfigurationV1alpha1().MachineOSBuilds().List(ctx, metav1.ListOptions{})
529+
require.NoError(t, err)
530+
if !found(mosb, mosbList.Items) {
531+
t.Errorf("Expected %v to be in the list %v", mosb.Name, mosbList.Items)
532+
}
533+
}
534+
464535
func assertBuildObjectsAreCreated(ctx context.Context, t *testing.T, kubeassert *testhelpers.Assertions, mosb *mcfgv1alpha1.MachineOSBuild) {
465536
t.Helper()
466537

@@ -481,7 +552,7 @@ func assertBuildObjectsAreDeleted(ctx context.Context, t *testing.T, kubeassert
481552
kubeassert.SecretDoesNotExist(utils.GetFinalPushSecretName(mosb))
482553
}
483554

484-
func setupOSBuildControllerForTest(ctx context.Context, t *testing.T) (*fakecorev1client.Clientset, *fakeclientmachineconfigv1.Clientset, *testhelpers.Assertions, *fixtures.ObjectsForTest) {
555+
func setupOSBuildControllerForTest(ctx context.Context, t *testing.T) (*fakecorev1client.Clientset, *fakeclientmachineconfigv1.Clientset, *testhelpers.Assertions, *fixtures.ObjectsForTest, *OSBuildController) {
485556
kubeclient, mcfgclient, lobj, kubeassert := fixtures.GetClientsForTest(t)
486557

487558
cfg := Config{
@@ -498,11 +569,11 @@ func setupOSBuildControllerForTest(ctx context.Context, t *testing.T) (*fakecore
498569

499570
kubeassert = kubeassert.Eventually().WithContext(ctx).WithPollInterval(time.Millisecond)
500571

501-
return kubeclient, mcfgclient, kubeassert, lobj
572+
return kubeclient, mcfgclient, kubeassert, lobj, ctrl
502573
}
503574

504-
func setupOSBuildControllerForTestWithBuild(ctx context.Context, t *testing.T, poolName string) (*fakecorev1client.Clientset, *fakeclientmachineconfigv1.Clientset, *mcfgv1alpha1.MachineOSConfig, *mcfgv1alpha1.MachineOSBuild, *mcfgv1.MachineConfigPool, *testhelpers.Assertions) {
505-
kubeclient, mcfgclient, kubeassert, lobj := setupOSBuildControllerForTest(ctx, t)
575+
func setupOSBuildControllerForTestWithBuild(ctx context.Context, t *testing.T, poolName string) (*fakecorev1client.Clientset, *fakeclientmachineconfigv1.Clientset, *mcfgv1alpha1.MachineOSConfig, *mcfgv1alpha1.MachineOSBuild, *mcfgv1.MachineConfigPool, *testhelpers.Assertions, *OSBuildController) {
576+
kubeclient, mcfgclient, kubeassert, lobj, ctrl := setupOSBuildControllerForTest(ctx, t)
506577

507578
mcp := lobj.MachineConfigPool
508579
mosc := lobj.MachineOSConfig
@@ -513,13 +584,13 @@ func setupOSBuildControllerForTestWithBuild(ctx context.Context, t *testing.T, p
513584

514585
mosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, kubeclient, mosc, mcp)
515586

516-
return kubeclient, mcfgclient, mosc, mosb, mcp, kubeassert.WithPollInterval(time.Millisecond * 10).WithContext(ctx).Eventually()
587+
return kubeclient, mcfgclient, mosc, mosb, mcp, kubeassert.WithPollInterval(time.Millisecond * 10).WithContext(ctx).Eventually(), ctrl
517588
}
518589

519-
func setupOSBuildControllerForTestWithRunningBuild(ctx context.Context, t *testing.T, poolName string) (*fakecorev1client.Clientset, *fakeclientmachineconfigv1.Clientset, *mcfgv1alpha1.MachineOSConfig, *mcfgv1alpha1.MachineOSBuild, *mcfgv1.MachineConfigPool, *testhelpers.Assertions) {
590+
func setupOSBuildControllerForTestWithRunningBuild(ctx context.Context, t *testing.T, poolName string) (*fakecorev1client.Clientset, *fakeclientmachineconfigv1.Clientset, *mcfgv1alpha1.MachineOSConfig, *mcfgv1alpha1.MachineOSBuild, *mcfgv1.MachineConfigPool, *testhelpers.Assertions, *OSBuildController) {
520591
t.Helper()
521592

522-
kubeclient, mcfgclient, mosc, mosb, mcp, kubeassert := setupOSBuildControllerForTestWithBuild(ctx, t, poolName)
593+
kubeclient, mcfgclient, mosc, mosb, mcp, kubeassert, ctrl := setupOSBuildControllerForTestWithBuild(ctx, t, poolName)
523594

524595
initialBuildJobName := utils.GetBuildJobName(mosb)
525596

@@ -535,13 +606,13 @@ func setupOSBuildControllerForTestWithRunningBuild(ctx context.Context, t *testi
535606
// The MachineOSBuild should be running.
536607
kubeassert.Eventually().WithContext(ctx).MachineOSBuildIsRunning(mosb, "Expected the MachineOSBuild %s status to be running", mosb.Name)
537608

538-
return kubeclient, mcfgclient, mosc, mosb, mcp, kubeassert
609+
return kubeclient, mcfgclient, mosc, mosb, mcp, kubeassert, ctrl
539610
}
540611

541612
func setupOSBuildControllerForTestWithSuccessfulBuild(ctx context.Context, t *testing.T, poolName string) (*fakecorev1client.Clientset, *fakeclientmachineconfigv1.Clientset, *mcfgv1alpha1.MachineOSConfig, *mcfgv1alpha1.MachineOSBuild, *mcfgv1.MachineConfigPool, *testhelpers.Assertions) {
542613
t.Helper()
543614

544-
kubeclient, mcfgclient, mosc, mosb, mcp, kubeassert := setupOSBuildControllerForTestWithRunningBuild(ctx, t, poolName)
615+
kubeclient, mcfgclient, mosc, mosb, mcp, kubeassert, _ := setupOSBuildControllerForTestWithRunningBuild(ctx, t, poolName)
545616

546617
kubeassert.MachineOSBuildExists(mosb)
547618
kubeassert.JobExists(utils.GetBuildJobName(mosb))
@@ -555,7 +626,7 @@ func setupOSBuildControllerForTestWithSuccessfulBuild(ctx context.Context, t *te
555626
func setupOSBuildControllerForTestWithFailedBuild(ctx context.Context, t *testing.T, poolName string) (*fakecorev1client.Clientset, *fakeclientmachineconfigv1.Clientset, *mcfgv1alpha1.MachineOSConfig, *mcfgv1alpha1.MachineOSBuild, *mcfgv1.MachineConfigPool, *testhelpers.Assertions) {
556627
t.Helper()
557628

558-
kubeclient, mcfgclient, mosc, mosb, mcp, kubeassert := setupOSBuildControllerForTestWithBuild(ctx, t, poolName)
629+
kubeclient, mcfgclient, mosc, mosb, mcp, kubeassert, _ := setupOSBuildControllerForTestWithBuild(ctx, t, poolName)
559630

560631
initialBuildJobName := utils.GetBuildJobName(mosb)
561632

pkg/controller/build/reconciler.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -868,6 +868,19 @@ func (b *buildReconciler) syncMachineOSBuilds(ctx context.Context) error {
868868
// builder associated with it that one should be created.
869869
func (b *buildReconciler) syncMachineOSBuild(ctx context.Context, mosb *mcfgv1alpha1.MachineOSBuild) error {
870870
return b.timeObjectOperation(mosb, syncingVerb, func() error {
871+
872+
// It could be the case that the MCP the mosb in queue was targeting no longer is valid
873+
mcp, err := b.machineConfigPoolLister.Get(mosb.ObjectMeta.Labels[constants.TargetMachineConfigPoolLabelKey])
874+
if err != nil {
875+
return fmt.Errorf("could not get MachineConfigPool from MachineOSBuild %q: %w", mosb.Name, err)
876+
}
877+
878+
// An mosb which had previously been forgotten by the queue and is no longer desired by the mcp should not build
879+
if mosb.ObjectMeta.Labels[constants.RenderedMachineConfigLabelKey] != mcp.Spec.Configuration.Name {
880+
klog.Infof("The MachineOSBuild %q which builds the rendered Machine Config %q is no longer desired by the MCP %q", mosb.Name, mosb.ObjectMeta.Labels[constants.RenderedMachineConfigLabelKey], mosb.ObjectMeta.Labels[constants.TargetMachineConfigPoolLabelKey])
881+
return nil
882+
}
883+
871884
mosbState := ctrlcommon.NewMachineOSBuildState(mosb)
872885

873886
if mosbState.IsInTerminalState() {

0 commit comments

Comments
 (0)