Skip to content

Commit f6996dd

Browse files
committed
overlay node image before bootstrapping if necessary
As per openshift/enhancements#1637, we're trying to get rid of all OpenShift-versioned components from the bootimages. This means that there will no longer be oc, kubelet, or crio binaries for example, which bootstrapping obviously relies on. To adapt to this, the OpenShift installer now ships a new `node-image-overlay.service` in its bootstrap Ignition config. This service takes care of pulling down the node image and overlaying it, effectively updating the system to the node image version. Here, we accordingly also adapt assisted-installer so that we run `node-image-overlay.service` before starting e.g. `kubelet.service` and `bootkube.service`. See also: openshift/installer#8742
1 parent 5fabc5a commit f6996dd

File tree

2 files changed

+146
-0
lines changed

2 files changed

+146
-0
lines changed

src/installer/installer.go

+56
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ const (
5151
singleNodeMasterIgnitionPath = "/opt/openshift/master.ign"
5252
waitingForMastersStatusInfo = "Waiting for masters to join bootstrap control plane"
5353
waitingForBootstrapToPrepare = "Waiting for bootstrap node preparation"
54+
nodeImagePullService = "node-image-pull.service"
55+
nodeImageOverlayService = "node-image-overlay.service"
56+
openshiftClientBin = "/usr/bin/oc"
5457
)
5558

5659
var generalWaitTimeout = 30 * time.Second
@@ -453,6 +456,30 @@ func (i *installer) startBootstrap() error {
453456
return err
454457
}
455458

459+
// If we're in a pure RHEL/CentOS environment, we need to overlay the node image
460+
// first to have access to e.g. oc, kubelet, cri-o, etc...
461+
// https://github.com/openshift/enhancements/pull/1637
462+
if !i.ops.FileExists(openshiftClientBin) {
463+
err = i.ops.SystemctlAction("start", "--no-block", nodeImagePullService, nodeImageOverlayService)
464+
if err != nil {
465+
return err
466+
}
467+
468+
if err = i.waitForActiveService(nodeImagePullService, context.Background()); err != nil {
469+
return err
470+
}
471+
472+
if err = i.waitForActiveService(nodeImageOverlayService, context.Background()); err != nil {
473+
return err
474+
}
475+
476+
// This is a sanity-check; the overlay was successful so we never expect this to
477+
// fail unless there's a bug somewhere.
478+
if !i.ops.FileExists(openshiftClientBin) {
479+
return errors.Errorf("%s successful but missing %s", nodeImageOverlayService, openshiftClientBin)
480+
}
481+
}
482+
456483
servicesToStart := []string{"bootkube.service", "approve-csr.service", "progress.service"}
457484
for _, service := range servicesToStart {
458485
err = i.ops.SystemctlAction("start", service)
@@ -669,6 +696,35 @@ func (i *installer) waitForBootkube(ctx context.Context) {
669696
}
670697
}
671698

699+
func (i *installer) waitForActiveService(service string, ctx context.Context) error {
700+
i.log.Infof("Waiting for %s to complete", service)
701+
702+
var rErr error
703+
waitErr := utils.WaitForPredicate(waitForeverTimeout, generalWaitInterval, func() bool {
704+
// Check if service has completed every 5 seconds. Use `show -P ActiveState`
705+
// instead of `is-active` because the latter has exit code semantics we don't want.
706+
if result, err := i.ops.ExecPrivilegeCommand(nil, "systemctl", "show", "-P", "ActiveState", service); err != nil {
707+
i.log.WithError(err).Warnf("error occurred checking state of %s", service)
708+
} else {
709+
i.log.Infof("%s status: %s", service, result)
710+
switch result {
711+
case "active":
712+
return true
713+
case "failed":
714+
out, _ := i.ops.ExecPrivilegeCommand(nil, "systemctl", "status", service)
715+
i.log.Info(out)
716+
rErr = errors.Errorf("service %s failed", service)
717+
return true
718+
default:
719+
break
720+
}
721+
}
722+
return false
723+
})
724+
725+
return stderrors.Join(rErr, waitErr)
726+
}
727+
672728
func (i *installer) waitForController(kc k8s_client.K8SClient) error {
673729
i.log.Infof("Waiting for controller to be ready")
674730
i.UpdateHostInstallProgress(models.HostStageWaitingForController, "waiting for controller pod ready event")

src/installer/installer_test.go

+90
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,28 @@ var _ = Describe("installer HostRoleMaster role", func() {
271271
mockops.EXPECT().CreateRandomHostname(gomock.Any()).Return(nil).Times(1)
272272
}
273273
}
274+
checkOcBinary := func(exists bool) {
275+
mockops.EXPECT().FileExists(openshiftClientBin).Return(exists).Times(1)
276+
}
277+
checkOverlayService := func(name string, injectError bool) {
278+
// verify that we retry if `systemctl show` fails for some reason
279+
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("", errors.New("bad")).Times(1)
280+
// verify that we retry if service is still inactive (hasn't started yet)
281+
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("inactive", nil).Times(1)
282+
if !injectError {
283+
// ok, succeed this time
284+
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("active", nil).Times(1)
285+
} else {
286+
// oh no! the service failed!
287+
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("failed", nil).Times(1)
288+
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "status", name).Return("status", nil).Times(1)
289+
}
290+
}
291+
overlayNodeImage := func(injectError bool) {
292+
mockops.EXPECT().SystemctlAction("start", "--no-block", nodeImagePullService, nodeImageOverlayService).Return(nil).Times(1)
293+
checkOverlayService(nodeImagePullService, false)
294+
checkOverlayService(nodeImageOverlayService, injectError)
295+
}
274296
startServicesSuccess := func() {
275297
services := []string{"bootkube.service", "progress.service", "approve-csr.service"}
276298
for i := range services {
@@ -354,6 +376,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
354376
checkLocalHostname("notlocalhost", nil)
355377
restartNetworkManager(nil)
356378
prepareControllerSuccess()
379+
checkOcBinary(true)
357380
startServicesSuccess()
358381
WaitMasterNodesSucccess()
359382
waitForBootkubeSuccess()
@@ -375,6 +398,63 @@ var _ = Describe("installer HostRoleMaster role", func() {
375398
ret := installerObj.InstallNode()
376399
Expect(ret).Should(BeNil())
377400
})
401+
It("bootstrap role happy flow on RHEL-only bootimage", func() {
402+
updateProgressSuccess([][]string{{string(models.HostStageStartingInstallation), conf.Role},
403+
{string(models.HostStageWaitingForControlPlane), waitingForBootstrapToPrepare},
404+
{string(models.HostStageWaitingForControlPlane), waitingForMastersStatusInfo},
405+
{string(models.HostStageInstalling), string(models.HostRoleMaster)},
406+
{string(models.HostStageWritingImageToDisk)},
407+
{string(models.HostStageRebooting)},
408+
})
409+
bootstrapSetup()
410+
checkLocalHostname("notlocalhost", nil)
411+
restartNetworkManager(nil)
412+
prepareControllerSuccess()
413+
checkOcBinary(false)
414+
overlayNodeImage(false)
415+
checkOcBinary(true)
416+
startServicesSuccess()
417+
WaitMasterNodesSucccess()
418+
waitForBootkubeSuccess()
419+
bootkubeStatusSuccess()
420+
waitForETCDBootstrapSuccess()
421+
bootstrapETCDStatusSuccess()
422+
resolvConfSuccess()
423+
waitForControllerSuccessfully(conf.ClusterID)
424+
//HostRoleMaster flow:
425+
downloadHostIgnitionSuccess(infraEnvId, hostId, "master-host-id.ign")
426+
writeToDiskSuccess(gomock.Any())
427+
reportLogProgressSuccess()
428+
setBootOrderSuccess(gomock.Any())
429+
uploadLogsSuccess(true)
430+
ironicAgentDoesntExist()
431+
rebootSuccess()
432+
getEncapsulatedMcSuccess(nil)
433+
overwriteImageSuccess()
434+
ret := installerObj.InstallNode()
435+
Expect(ret).Should(BeNil())
436+
})
437+
It("bootstrap role fails on RHEL-only bootimage if can't overlay node image", func() {
438+
updateProgressSuccess([][]string{{string(models.HostStageStartingInstallation), conf.Role},
439+
{string(models.HostStageWaitingForControlPlane), waitingForBootstrapToPrepare},
440+
{string(models.HostStageInstalling), string(models.HostRoleMaster)},
441+
{string(models.HostStageWritingImageToDisk)},
442+
})
443+
bootstrapSetup()
444+
checkLocalHostname("notlocalhost", nil)
445+
restartNetworkManager(nil)
446+
prepareControllerSuccess()
447+
checkOcBinary(false)
448+
overlayNodeImage(true)
449+
//HostRoleMaster flow:
450+
downloadHostIgnitionSuccess(infraEnvId, hostId, "master-host-id.ign")
451+
writeToDiskSuccess(gomock.Any())
452+
setBootOrderSuccess(gomock.Any())
453+
getEncapsulatedMcSuccess(nil)
454+
overwriteImageSuccess()
455+
ret := installerObj.InstallNode()
456+
Expect(ret).To(HaveOccurred())
457+
})
378458
It("bootstrap role happy flow with invalid hostname", func() {
379459
updateProgressSuccess([][]string{{string(models.HostStageStartingInstallation), conf.Role},
380460
{string(models.HostStageWaitingForControlPlane), waitingForBootstrapToPrepare},
@@ -387,6 +467,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
387467
checkLocalHostname("InvalidHostname", nil)
388468
restartNetworkManager(nil)
389469
prepareControllerSuccess()
470+
checkOcBinary(true)
390471
startServicesSuccess()
391472
WaitMasterNodesSucccess()
392473
waitForBootkubeSuccess()
@@ -420,6 +501,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
420501
checkLocalHostname("localhost", nil)
421502
restartNetworkManager(nil)
422503
prepareControllerSuccess()
504+
checkOcBinary(true)
423505
startServicesSuccess()
424506
WaitMasterNodesSucccess()
425507
waitForBootkubeSuccess()
@@ -454,6 +536,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
454536
checkLocalHostname("notlocalhost", nil)
455537
restartNetworkManager(nil)
456538
prepareControllerSuccess()
539+
checkOcBinary(true)
457540
startServicesSuccess()
458541
WaitMasterNodesSucccessWithCluster(&models.Cluster{
459542
Platform: &models.Platform{
@@ -520,6 +603,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
520603
checkLocalHostname("notlocalhost", nil)
521604
restartNetworkManager(nil)
522605
prepareControllerSuccess()
606+
checkOcBinary(true)
523607
startServicesSuccess()
524608
WaitMasterNodesSucccess()
525609
waitForBootkubeSuccess()
@@ -633,6 +717,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
633717
checkLocalHostname("notlocalhost", nil)
634718
restartNetworkManager(nil)
635719
prepareControllerSuccess()
720+
checkOcBinary(true)
636721
startServicesSuccess()
637722

638723
mockbmclient.EXPECT().GetEnabledHostsNamesHosts(gomock.Any(), gomock.Any()).Return(inventoryNamesHost, nil).AnyTimes()
@@ -1037,6 +1122,9 @@ var _ = Describe("installer HostRoleMaster role", func() {
10371122
mockops.EXPECT().CreateRandomHostname(gomock.Any()).Return(nil).Times(1)
10381123
}
10391124
}
1125+
checkOcBinary := func(exists bool) {
1126+
mockops.EXPECT().FileExists(openshiftClientBin).Return(exists).Times(1)
1127+
}
10401128
startServicesSuccess := func() {
10411129
services := []string{"bootkube.service", "progress.service", "approve-csr.service"}
10421130
for i := range services {
@@ -1079,6 +1167,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
10791167
singleNodeBootstrapSetup()
10801168
checkLocalHostname("localhost", nil)
10811169
prepareControllerSuccess()
1170+
checkOcBinary(true)
10821171
startServicesSuccess()
10831172
waitForBootkubeSuccess()
10841173
bootkubeStatusSuccess()
@@ -1116,6 +1205,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
11161205
singleNodeBootstrapSetup()
11171206
checkLocalHostname("localhost", nil)
11181207
prepareControllerSuccess()
1208+
checkOcBinary(true)
11191209
startServicesSuccess()
11201210
waitForBootkubeSuccess()
11211211
bootkubeStatusSuccess()

0 commit comments

Comments
 (0)