Skip to content

Commit ed9bc20

Browse files
asm582openshift-merge-robot
authored andcommitted
remove threads, fix backoff
1 parent 96ab8fa commit ed9bc20

File tree

1 file changed

+18
-4
lines changed

1 file changed

+18
-4
lines changed

Diff for: pkg/controller/queuejob/queuejob_controller_ex.go

+18-4
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ func (qjm *XController) PreemptQueueJobs() {
466466
// Only back-off AWs that are in state running and not in state Failed
467467
if updateNewJob.Status.State != arbv1.AppWrapperStateFailed {
468468
klog.Infof("[PreemptQueueJobs] Adding preempted AppWrapper %s/%s to back off queue.", aw.Name, aw.Namespace)
469-
go qjm.backoff(ctx, updateNewJob, "PreemptionTriggered", string(message))
469+
qjm.backoff(ctx, updateNewJob, "PreemptionTriggered", string(message))
470470
}
471471
}
472472
}
@@ -1155,7 +1155,7 @@ func (qjm *XController) ScheduleNext(qj *arbv1.AppWrapper) {
11551155
} else {
11561156
dispatchFailedMessage = "Cannot find an cluster with enough resources to dispatch AppWrapper."
11571157
klog.V(2).Infof("[ScheduleNex] [Dispatcher Mode] %s %s\n", dispatchFailedReason, dispatchFailedMessage)
1158-
go qjm.backoff(ctx, qj, dispatchFailedReason, dispatchFailedMessage)
1158+
qjm.backoff(ctx, qj, dispatchFailedReason, dispatchFailedMessage)
11591159
}
11601160
} else { // Agent Mode
11611161
aggqj := qjm.GetAggregatedResources(qj)
@@ -1284,7 +1284,7 @@ func (qjm *XController) ScheduleNext(qj *arbv1.AppWrapper) {
12841284
// TODO: Remove forwarded logic as a big AW will never be forwarded
12851285
forwarded = true
12861286
// should we call backoff or update etcd?
1287-
go qjm.backoff(ctx, qj, dispatchFailedReason, dispatchFailedMessage)
1287+
qjm.backoff(ctx, qj, dispatchFailedReason, dispatchFailedMessage)
12881288
}
12891289
}
12901290
forwarded = true
@@ -1347,7 +1347,7 @@ func (qjm *XController) ScheduleNext(qj *arbv1.AppWrapper) {
13471347
if qjm.quotaManager != nil && quotaFits {
13481348
qjm.quotaManager.Release(qj)
13491349
}
1350-
go qjm.backoff(ctx, qj, dispatchFailedReason, dispatchFailedMessage)
1350+
qjm.backoff(ctx, qj, dispatchFailedReason, dispatchFailedMessage)
13511351
}
13521352
}
13531353
return nil
@@ -1672,6 +1672,20 @@ func (cc *XController) updateQueueJob(oldObj, newObj interface{}) {
16721672
}
16731673

16741674
klog.V(6).Infof("[Informer-updateQJ] '%s/%s' *Delay=%.6f seconds normal enqueue Version=%s Status=%v", newQJ.Namespace, newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ.ResourceVersion, newQJ.Status)
1675+
for _, cond := range newQJ.Status.Conditions {
1676+
if cond.Type == arbv1.AppWrapperCondBackoff {
1677+
//AWs that have backoff conditions have a delay of 10 seconds before getting added to enqueue.
1678+
//TODO: we could plug an interface here with back-off strategies for different MCAD use cases.
1679+
time.AfterFunc(time.Duration(cc.serverOption.BackoffTime)*time.Second, func() {
1680+
if cc.serverOption.QuotaEnabled && cc.quotaManager != nil {
1681+
cc.quotaManager.Release(newQJ)
1682+
}
1683+
cc.enqueue(newQJ)
1684+
})
1685+
return
1686+
}
1687+
}
1688+
16751689
// cc.eventQueue.Delete(oldObj)
16761690
cc.enqueue(newQJ)
16771691
}

0 commit comments

Comments
 (0)