Skip to content

Commit 329ad7b

Browse files
authoredAug 24, 2023
Fixing Unloading Kmod on a previously failed Load Pod (#530)
This PR fixes 2 issues: 1) In case Load Pod is failing, its Config in status is nil. if NMC is configured to un load the module, for whatever reason, the code must also check if the Config is nil, and then just to delete the status for module, in order to avoid panic 2) When unload Pod is failing, its status should be retained as is, otherwise the Config is lost, and the following unload pod won't be able to run
1 parent d3510c6 commit 329ad7b

File tree

3 files changed

+45
-14
lines changed

3 files changed

+45
-14
lines changed
 

‎internal/controllers/nmc_reconciler.go

+23-11
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ func (w *workerHelperImpl) ProcessModuleSpec(
274274

275275
func (w *workerHelperImpl) ProcessOrphanModuleStatus(
276276
ctx context.Context,
277-
nmc *kmmv1beta1.NodeModulesConfig,
277+
nmcObj *kmmv1beta1.NodeModulesConfig,
278278
status *kmmv1beta1.NodeModuleStatus,
279279
) error {
280280
logger := ctrl.LoggerFrom(ctx)
@@ -284,9 +284,16 @@ func (w *workerHelperImpl) ProcessOrphanModuleStatus(
284284
return nil
285285
}
286286

287+
if status.Config == nil {
288+
logger.Info("Missing status config and pod is not running: previously failed pod, no need to unload")
289+
patchFrom := client.MergeFrom(nmcObj.DeepCopy())
290+
nmc.RemoveModuleStatus(&nmcObj.Status.Modules, status.Namespace, status.Name)
291+
return w.client.Status().Patch(ctx, nmcObj, patchFrom)
292+
}
293+
287294
logger.Info("Creating unloader Pod")
288295

289-
return w.pm.CreateUnloaderPod(ctx, nmc, status)
296+
return w.pm.CreateUnloaderPod(ctx, nmcObj, status)
290297
}
291298

292299
func (w *workerHelperImpl) RemoveOrphanFinalizers(ctx context.Context, nodeName string) error {
@@ -347,27 +354,32 @@ func (w *workerHelperImpl) SyncStatus(ctx context.Context, nmcObj *kmmv1beta1.No
347354

348355
logger.Info("Processing worker Pod")
349356

350-
status := kmmv1beta1.NodeModuleStatus{
351-
Namespace: modNamespace,
352-
Name: modName,
357+
status := nmc.FindModuleStatus(nmcObj.Status.Modules, modNamespace, modName)
358+
if status == nil {
359+
status = &kmmv1beta1.NodeModuleStatus{
360+
Namespace: modNamespace,
361+
Name: modName,
362+
}
353363
}
354364

355365
deletePod := false
356-
statusDeleted := false
366+
updateModuleStatus := false
357367

358368
switch phase {
359369
case v1.PodFailed:
370+
status.InProgress = false
360371
deletePod = true
372+
updateModuleStatus = true
361373
case v1.PodSucceeded:
362374
deletePod = true
363375

364376
if p.Labels[actionLabelKey] == WorkerActionUnload {
365377
nmc.RemoveModuleStatus(&nmcObj.Status.Modules, modNamespace, modName)
366-
deletePod = true
367-
statusDeleted = true
368378
break
369379
}
370380

381+
updateModuleStatus = true
382+
371383
config := kmmv1beta1.ModuleConfig{}
372384

373385
if err = yaml.UnmarshalStrict([]byte(p.Annotations[configAnnotationKey]), &config); err != nil {
@@ -388,9 +400,9 @@ func (w *workerHelperImpl) SyncStatus(ctx context.Context, nmcObj *kmmv1beta1.No
388400

389401
status.LastTransitionTime = &podLTT
390402

391-
deletePod = true
392403
case v1.PodPending, v1.PodRunning:
393404
status.InProgress = true
405+
updateModuleStatus = true
394406
// TODO: if the NMC's spec changed compared to the Pod's config, recreate the Pod
395407
default:
396408
errs = append(
@@ -410,8 +422,8 @@ func (w *workerHelperImpl) SyncStatus(ctx context.Context, nmcObj *kmmv1beta1.No
410422
}
411423
}
412424

413-
if !statusDeleted {
414-
nmc.SetModuleStatus(&nmcObj.Status.Modules, status)
425+
if updateModuleStatus {
426+
nmc.SetModuleStatus(&nmcObj.Status.Modules, *status)
415427
}
416428
}
417429

‎internal/controllers/nmc_reconciler_test.go

+20-1
Original file line numberDiff line numberDiff line change
@@ -355,14 +355,33 @@ var _ = Describe("workerHelper_ProcessOrphanModuleStatus", func() {
355355
)
356356
})
357357

358+
It("should remove status in case Config is nil", func() {
359+
nmc := &kmmv1beta1.NodeModulesConfig{}
360+
status := &kmmv1beta1.NodeModuleStatus{}
361+
362+
ctrl := gomock.NewController(GinkgoT())
363+
client := testclient.NewMockClient(ctrl)
364+
sw := testclient.NewMockStatusWriter(ctrl)
365+
client.EXPECT().Status().Return(sw)
366+
sw.EXPECT().Patch(ctx, nmc, gomock.Any())
367+
Expect(
368+
NewWorkerHelper(client, nil).ProcessOrphanModuleStatus(ctx, nmc, status),
369+
).NotTo(
370+
HaveOccurred(),
371+
)
372+
})
373+
358374
It("should create an unloader Pod", func() {
359375
ctrl := gomock.NewController(GinkgoT())
360376
client := testclient.NewMockClient(ctrl)
377+
361378
pm := NewMockpodManager(ctrl)
362379
wh := NewWorkerHelper(client, pm)
363380

364381
nmc := &kmmv1beta1.NodeModulesConfig{}
365-
status := &kmmv1beta1.NodeModuleStatus{}
382+
status := &kmmv1beta1.NodeModuleStatus{
383+
Config: &kmmv1beta1.ModuleConfig{},
384+
}
366385

367386
pm.EXPECT().CreateUnloaderPod(ctx, nmc, status)
368387

‎internal/nmc/helper.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ func (h *helper) GetModuleEntry(nmc *kmmv1beta1.NodeModulesConfig, modNamespace,
7575
return nil, 0
7676
}
7777

78-
func findModuleStatus(statuses []kmmv1beta1.NodeModuleStatus, moduleNamespace, moduleName string) *kmmv1beta1.NodeModuleStatus {
78+
func FindModuleStatus(statuses []kmmv1beta1.NodeModuleStatus, moduleNamespace, moduleName string) *kmmv1beta1.NodeModuleStatus {
7979
for i := 0; i < len(statuses); i++ {
8080
s := statuses[i]
8181

@@ -108,7 +108,7 @@ func SetModuleStatus(statuses *[]kmmv1beta1.NodeModuleStatus, status kmmv1beta1.
108108
return
109109
}
110110

111-
s := findModuleStatus(*statuses, status.Namespace, status.Name)
111+
s := FindModuleStatus(*statuses, status.Namespace, status.Name)
112112

113113
if s != nil {
114114
*s = status

0 commit comments

Comments
 (0)
Please sign in to comment.