Skip to content

Commit 6f6eba6

Browse files
authored
extend resetOrFail logic to better support desired autopilot semantics (#201)
1 parent bfb50a5 commit 6f6eba6

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

internal/controller/appwrapper/appwrapper_controller.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request)
219219
if fatal {
220220
return r.updateStatus(ctx, aw, workloadv1beta2.AppWrapperFailed) // always move to failed on fatal error
221221
} else {
222-
return r.resetOrFail(ctx, aw, false)
222+
return r.resetOrFail(ctx, aw, false, 1)
223223
}
224224
}
225225
return r.updateStatus(ctx, aw, workloadv1beta2.AppWrapperRunning)
@@ -259,7 +259,7 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request)
259259
Reason: "FailedComponent",
260260
Message: fmt.Sprintf("Found %v failed components", compStatus.failed),
261261
})
262-
return r.resetOrFail(ctx, aw, podStatus.terminalFailure)
262+
return r.resetOrFail(ctx, aw, podStatus.terminalFailure, 1)
263263
}
264264

265265
// Handle Success
@@ -297,7 +297,7 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request)
297297
if now.Before(deadline) {
298298
return ctrl.Result{RequeueAfter: deadline.Sub(now)}, r.Status().Update(ctx, aw)
299299
} else {
300-
return r.resetOrFail(ctx, aw, podStatus.terminalFailure)
300+
return r.resetOrFail(ctx, aw, podStatus.terminalFailure, 1)
301301
}
302302
}
303303

@@ -309,8 +309,8 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request)
309309
Reason: "AutopilotUnhealthy",
310310
Message: fmt.Sprintf("Workload contains pods using unhealthy resources on Nodes: %v", podStatus.unhealthyNodes),
311311
})
312-
// Go to reset directly because an Autopilot triggered evacuation does not count against the retry limit
313-
return r.updateStatus(ctx, aw, workloadv1beta2.AppWrapperResetting)
312+
// Autopilot triggered evacuation does not increment retry count
313+
return r.resetOrFail(ctx, aw, false, 0)
314314
}
315315

316316
clearCondition(aw, workloadv1beta2.Unhealthy, "FoundNoFailedPods", "")
@@ -344,7 +344,7 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request)
344344
Reason: "InsufficientPodsReady",
345345
Message: podDetailsMessage,
346346
})
347-
return r.resetOrFail(ctx, aw, podStatus.terminalFailure)
347+
return r.resetOrFail(ctx, aw, podStatus.terminalFailure, 1)
348348
}
349349

350350
case workloadv1beta2.AppWrapperSuspending: // undeploying components
@@ -487,10 +487,10 @@ func (r *AppWrapperReconciler) updateStatus(ctx context.Context, aw *workloadv1b
487487
return ctrl.Result{}, nil
488488
}
489489

490-
func (r *AppWrapperReconciler) resetOrFail(ctx context.Context, aw *workloadv1beta2.AppWrapper, terminalFailure bool) (ctrl.Result, error) {
490+
func (r *AppWrapperReconciler) resetOrFail(ctx context.Context, aw *workloadv1beta2.AppWrapper, terminalFailure bool, retryIncrement int32) (ctrl.Result, error) {
491491
maxRetries := r.retryLimit(ctx, aw)
492492
if !terminalFailure && aw.Status.Retries < maxRetries {
493-
aw.Status.Retries += 1
493+
aw.Status.Retries += retryIncrement
494494
return r.updateStatus(ctx, aw, workloadv1beta2.AppWrapperResetting)
495495
} else {
496496
return r.updateStatus(ctx, aw, workloadv1beta2.AppWrapperFailed)

0 commit comments

Comments
 (0)