You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
klog.V(6).Infof("[getAggAvaiResPri] %s: AW %s cannot run, adding any dangling pod resources %v while it being preempted.", time.Now().String(), value.Name, totalResource)
909
+
preemptable=preemptable.Add(totalResource)
908
910
continue
909
911
} elseifvalue.Status.SystemPriority<targetpr {
910
912
// Dispatcher Mode: Ensure this job is part of the target cluster
// Don't count the resources that can run but not yet realized (job orchestration pending or partially running).
943
+
} elseifvalue.Status.CanRun {
944
+
qjv:=clusterstateapi.EmptyResource()
943
945
for_, resctrl:=rangeqjm.qjobResControls {
944
-
qjv:=resctrl.GetAggregatedResources(value)
945
-
pending=pending.Add(qjv)
946
+
res:=resctrl.GetAggregatedResources(value)
947
+
qjv.Add(res)
946
948
klog.V(10).Infof("[getAggAvaiResPri] Subtract all resources %+v in resctrlType=%T for job %s which can-run is set to: %v but state is still pending.", qjv, resctrl, value.Name, value.Status.CanRun)
klog.V(10).Infof("[getAggAvaiResPri] Subtract all resources %+v in resctrlType=%T for job %s which can-run is set to: %v but state is still pending.", qjv, genericItem, value.Name, value.Status.CanRun)
klog.V(10).Infof("[getAggAvaiResPri] Subtract all resources %+v in genericItem=%T for job %s which can-run is set to: %v but state is still pending.", qjv, genericItem, value.Name, value.Status.CanRun)
//Don't count partially running jobs with pods still pending.
958
-
for_, resctrl:=rangeqjm.qjobResControls {
959
-
qjv:=resctrl.GetAggregatedResources(value)
960
-
pending=pending.Add(qjv)
961
-
klog.V(4).Infof("[getAggAvaiResPri] Subtract all resources %+v in resctrlType=%T for job %s which can-run is set to: %v and status set to: %s but no pod counts in the state have been defined.", qjv, resctrl, value.Name, value.Status.CanRun, value.Status.State)
klog.V(4).Infof("[getAggAvaiResPri] Subtract all resources %+v in resctrlType=%T for job %s which can-run is set to: %v and status set to: %s but no pod counts in the state have been defined.", qjv, genericItem, value.Name, value.Status.CanRun, value.Status.State)
967
-
}
968
-
969
-
} else {
970
-
// TODO: Hack to handle race condition when Running jobs have not yet updated the pod counts (In-Flight AW Jobs)
971
-
// This hack uses the golang struct implied behavior of defining the object without a value. In this case
972
-
// of using 'int32' novalue and value of 0 are the same.
// If pod counts for AW have not updated within the timeout window, account for
983
-
// this object's resources to give the object controller more time to start creating
984
-
// pods. This matters when resources are scare. Once the timeout expires,
985
-
// resources for this object will not be held and other AW may be dispatched which
986
-
// could consume resources initially allocated for this object. This is to handle
987
-
// object controllers (essentially custom resource controllers) that do not work as
988
-
// expected by creating pods.
989
-
ifqjm.waitForPodCountUpdates(dispatchedCond) {
990
-
for_, resctrl:=rangeqjm.qjobResControls {
991
-
qjv:=resctrl.GetAggregatedResources(value)
992
-
pending=pending.Add(qjv)
993
-
klog.V(4).Infof("[getAggAvaiResPri] Subtract all resources %+v in resctrlType=%T for job %s which can-run is set to: %v and status set to: %s but no pod counts in the state have been defined.", qjv, resctrl, value.Name, value.Status.CanRun, value.Status.State)
klog.V(4).Infof("[getAggAvaiResPri] Subtract all resources %+v in resctrlType=%T for job %s which can-run is set to: %v and status set to: %s but no pod counts in the state have been defined.", qjv, genericItem, value.Name, value.Status.CanRun, value.Status.State)
999
-
}
1000
-
} else {
1001
-
klog.V(4).Infof("[getAggAvaiResPri] Resources will no longer be reserved for %s/%s due to timeout of %d ms for pod creating.", value.Name, value.Namespace, qjm.serverOption.DispatchResourceReservationTimeout)
0 commit comments