Skip to content

Commit 3a2555e

Browse files
committed
Emit events for resize error states
1 parent 61e6242 commit 3a2555e

File tree

2 files changed

+32
-12
lines changed

2 files changed

+32
-12
lines changed

pkg/kubelet/events/event.go

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ const (
3434
FailedToCreatePodContainer = "FailedCreatePodContainer"
3535
FailedToMakePodDataDirectories = "Failed"
3636
NetworkNotReady = "NetworkNotReady"
37+
ResizeDeferred = "ResizeDeferred"
38+
ResizeInfeasible = "ResizeInfeasible"
3739
)
3840

3941
// Image event reason list

pkg/kubelet/kubelet.go

+30-12
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ import (
118118
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
119119
"k8s.io/kubernetes/pkg/kubelet/userns"
120120
"k8s.io/kubernetes/pkg/kubelet/util"
121+
"k8s.io/kubernetes/pkg/kubelet/util/format"
121122
"k8s.io/kubernetes/pkg/kubelet/util/manager"
122123
"k8s.io/kubernetes/pkg/kubelet/util/queue"
123124
"k8s.io/kubernetes/pkg/kubelet/util/sliceutils"
@@ -2829,38 +2830,47 @@ func isPodResizeInProgress(pod *v1.Pod, podStatus *kubecontainer.PodStatus) bool
28292830
// canResizePod determines if the requested resize is currently feasible.
28302831
// pod should hold the desired (pre-allocated) spec.
28312832
// Returns true if the resize can proceed.
2832-
func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, v1.PodResizeStatus) {
2833+
func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, v1.PodResizeStatus, string) {
28332834
if goos == "windows" {
2834-
return false, v1.PodResizeStatusInfeasible
2835+
return false, v1.PodResizeStatusInfeasible, "Resizing Windows pods is not supported"
28352836
}
28362837

28372838
if v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed && !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) {
28382839
if utilfeature.DefaultFeatureGate.Enabled(features.CPUManager) {
28392840
if kl.containerManager.GetNodeConfig().CPUManagerPolicy == "static" {
2840-
klog.V(3).InfoS("Resize is infeasible for Guaranteed Pods alongside CPU Manager static policy")
2841-
return false, v1.PodResizeStatusInfeasible
2841+
msg := "Resize is infeasible for Guaranteed Pods alongside CPU Manager static policy"
2842+
klog.V(3).InfoS(msg, "pod", format.Pod(pod))
2843+
return false, v1.PodResizeStatusInfeasible, msg
28422844
}
28432845
}
28442846
if utilfeature.DefaultFeatureGate.Enabled(features.MemoryManager) {
28452847
if kl.containerManager.GetNodeConfig().ExperimentalMemoryManagerPolicy == "static" {
2846-
klog.V(3).InfoS("Resize is infeasible for Guaranteed Pods alongside Memory Manager static policy")
2847-
return false, v1.PodResizeStatusInfeasible
2848+
msg := "Resize is infeasible for Guaranteed Pods alongside Memory Manager static policy"
2849+
klog.V(3).InfoS(msg, "pod", format.Pod(pod))
2850+
return false, v1.PodResizeStatusInfeasible, msg
28482851
}
28492852
}
28502853
}
28512854

28522855
node, err := kl.getNodeAnyWay()
28532856
if err != nil {
28542857
klog.ErrorS(err, "getNodeAnyway function failed")
2855-
return false, ""
2858+
return false, "", ""
28562859
}
28572860
cpuAvailable := node.Status.Allocatable.Cpu().MilliValue()
28582861
memAvailable := node.Status.Allocatable.Memory().Value()
28592862
cpuRequests := resource.GetResourceRequest(pod, v1.ResourceCPU)
28602863
memRequests := resource.GetResourceRequest(pod, v1.ResourceMemory)
28612864
if cpuRequests > cpuAvailable || memRequests > memAvailable {
2862-
klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "pod", klog.KObj(pod))
2863-
return false, v1.PodResizeStatusInfeasible
2865+
var msg string
2866+
if memRequests > memAvailable {
2867+
msg = fmt.Sprintf("memory, requested: %d, capacity: %d", memRequests, memAvailable)
2868+
} else {
2869+
msg = fmt.Sprintf("cpu, requested: %d, capacity: %d", cpuRequests, cpuAvailable)
2870+
}
2871+
msg = "Node didn't have enough capacity: " + msg
2872+
klog.V(3).InfoS(msg, "pod", klog.KObj(pod))
2873+
return false, v1.PodResizeStatusInfeasible, msg
28642874
}
28652875

28662876
// Treat the existing pod needing resize as a new pod with desired resources seeking admit.
@@ -2871,10 +2881,10 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, v1.PodResizeStatus) {
28712881
if ok, failReason, failMessage := kl.canAdmitPod(allocatedPods, pod); !ok {
28722882
// Log reason and return. Let the next sync iteration retry the resize
28732883
klog.V(3).InfoS("Resize cannot be accommodated", "pod", klog.KObj(pod), "reason", failReason, "message", failMessage)
2874-
return false, v1.PodResizeStatusDeferred
2884+
return false, v1.PodResizeStatusDeferred, failMessage
28752885
}
28762886

2877-
return true, v1.PodResizeStatusInProgress
2887+
return true, v1.PodResizeStatusInProgress, ""
28782888
}
28792889

28802890
// handlePodResourcesResize returns the "allocated pod", which should be used for all resource
@@ -2899,7 +2909,7 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine
28992909
kl.podResizeMutex.Lock()
29002910
defer kl.podResizeMutex.Unlock()
29012911
// Desired resources != allocated resources. Can we update the allocation to the desired resources?
2902-
fit, resizeStatus := kl.canResizePod(pod)
2912+
fit, resizeStatus, resizeMsg := kl.canResizePod(pod)
29032913
if fit {
29042914
// Update pod resource allocation checkpoint
29052915
if err := kl.statusManager.SetPodAllocation(pod); err != nil {
@@ -2925,6 +2935,14 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine
29252935
}
29262936
if resizeStatus != "" {
29272937
kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus)
2938+
if resizeMsg != "" {
2939+
switch resizeStatus {
2940+
case v1.PodResizeStatusDeferred:
2941+
kl.recorder.Eventf(pod, v1.EventTypeWarning, events.ResizeDeferred, resizeMsg)
2942+
case v1.PodResizeStatusInfeasible:
2943+
kl.recorder.Eventf(pod, v1.EventTypeWarning, events.ResizeInfeasible, resizeMsg)
2944+
}
2945+
}
29282946
}
29292947
return allocatedPod, nil
29302948
}

0 commit comments

Comments
 (0)