Skip to content

Commit 591c75e

Browse files
authored
Merge pull request kubernetes#128694 from tallclair/revert-127300-error-propagation
Revert "[FG:InPlacePodVerticalScaling] kubelet: Propagate error in doPodResizeAction() to the caller"
2 parents fd66693 + 1d822f1 commit 591c75e

File tree

5 files changed

+32
-254
lines changed

5 files changed

+32
-254
lines changed

pkg/kubelet/container/sync_result.go

-12
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,6 @@ var (
4545
ErrConfigPodSandbox = errors.New("ConfigPodSandboxError")
4646
// ErrKillPodSandbox returned when runtime failed to stop pod's sandbox.
4747
ErrKillPodSandbox = errors.New("KillPodSandboxError")
48-
// ErrUpdatePodSandbox returned when runtime failed to update the pod's sandbox config.
49-
ErrUpdatePodSandbox = errors.New("UpdatePodSandboxError")
50-
// ErrUpdateContainerMemory returned when runtime failed to update the pod's container config.
51-
ErrUpdateContainerMemory = errors.New("UpdateContainerMemoryError")
52-
// ErrUpdateContainerCPU returned when runtime failed to update the pod's container config.
53-
ErrUpdateContainerCPU = errors.New("UpdateContainerCPUError")
5448
)
5549

5650
// SyncAction indicates different kind of actions in SyncPod() and KillPod(). Now there are only actions
@@ -74,12 +68,6 @@ const (
7468
ConfigPodSandbox SyncAction = "ConfigPodSandbox"
7569
// KillPodSandbox action
7670
KillPodSandbox SyncAction = "KillPodSandbox"
77-
// UpdatePodSandbox action
78-
UpdatePodSandbox SyncAction = "UpdatePodSandbox"
79-
// UpdateContainerMemory action
80-
UpdateContainerMemory SyncAction = "UpdateContainerMemory"
81-
// UpdateContainerCPU action
82-
UpdateContainerCPU SyncAction = "UpdateContainerCPU"
8371
)
8472

8573
// SyncResult is the result of sync action.

pkg/kubelet/kuberuntime/kuberuntime_container.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -392,11 +392,12 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(ctx context.Context,
392392
return config, cleanupAction, nil
393393
}
394394

395-
func (m *kubeGenericRuntimeManager) updateContainerResources(ctx context.Context, pod *v1.Pod, container *v1.Container, containerID kubecontainer.ContainerID) error {
395+
func (m *kubeGenericRuntimeManager) updateContainerResources(pod *v1.Pod, container *v1.Container, containerID kubecontainer.ContainerID) error {
396396
containerResources := m.generateContainerResources(pod, container)
397397
if containerResources == nil {
398398
return fmt.Errorf("container %q updateContainerResources failed: cannot generate resources config", containerID.String())
399399
}
400+
ctx := context.Background()
400401
err := m.runtimeService.UpdateContainerResources(ctx, containerID.ID, containerResources)
401402
if err != nil {
402403
klog.ErrorS(err, "UpdateContainerResources failed", "container", containerID.String())

pkg/kubelet/kuberuntime/kuberuntime_container_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -963,7 +963,7 @@ func TestUpdateContainerResources(t *testing.T) {
963963
assert.NoError(t, err)
964964
containerID := cStatus[0].ID
965965

966-
err = m.updateContainerResources(ctx, pod, &pod.Spec.Containers[0], containerID)
966+
err = m.updateContainerResources(pod, &pod.Spec.Containers[0], containerID)
967967
assert.NoError(t, err)
968968

969969
// Verify container is updated

pkg/kubelet/kuberuntime/kuberuntime_manager.go

+27-37
Original file line numberDiff line numberDiff line change
@@ -666,14 +666,13 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe
666666
return true
667667
}
668668

669-
func (m *kubeGenericRuntimeManager) doPodResizeAction(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, podContainerChanges podActions) (result kubecontainer.PodSyncResult) {
670-
updatePodResult := kubecontainer.NewSyncResult(kubecontainer.UpdatePodSandbox, pod.UID)
671-
result.AddSyncResult(updatePodResult)
669+
func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *kubecontainer.PodStatus, podContainerChanges podActions, result kubecontainer.PodSyncResult) {
672670
pcm := m.containerManager.NewPodContainerManager()
673671
//TODO(vinaykul,InPlacePodVerticalScaling): Figure out best way to get enforceMemoryQoS value (parameter #4 below) in platform-agnostic way
674672
podResources := cm.ResourceConfigForPod(pod, m.cpuCFSQuota, uint64((m.cpuCFSQuotaPeriod.Duration)/time.Microsecond), false)
675673
if podResources == nil {
676-
result.Fail(fmt.Errorf("unable to get resource configuration processing resize for pod %s", format.Pod(pod)))
674+
klog.ErrorS(nil, "Unable to get resource configuration", "pod", pod.Name)
675+
result.Fail(fmt.Errorf("unable to get resource configuration processing resize for pod %s", pod.Name))
677676
return
678677
}
679678
setPodCgroupConfig := func(rName v1.ResourceName, setLimitValue bool) error {
@@ -691,7 +690,10 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(ctx context.Context, pod *
691690
case v1.ResourceMemory:
692691
err = pcm.SetPodCgroupConfig(pod, podResources)
693692
}
694-
return fmt.Errorf("failed to set cgroup config for %s of pod %s: %w", rName, format.Pod(pod), err)
693+
if err != nil {
694+
klog.ErrorS(err, "Failed to set cgroup config", "resource", rName, "pod", pod.Name)
695+
}
696+
return err
695697
}
696698
// Memory and CPU are updated separately because memory resizes may be ordered differently than CPU resizes.
697699
// If resize results in net pod resource increase, set pod cgroup config before resizing containers.
@@ -711,12 +713,9 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(ctx context.Context, pod *
711713
}
712714
}
713715
if len(podContainerChanges.ContainersToUpdate[rName]) > 0 {
714-
updateContainerResults, errUpdate := m.updatePodContainerResources(ctx, pod, rName, podContainerChanges.ContainersToUpdate[rName])
715-
for _, containerResult := range updateContainerResults {
716-
result.AddSyncResult(containerResult)
717-
}
718-
if errUpdate != nil {
719-
return errUpdate
716+
if err = m.updatePodContainerResources(pod, rName, podContainerChanges.ContainersToUpdate[rName]); err != nil {
717+
klog.ErrorS(err, "updatePodContainerResources failed", "pod", format.Pod(pod), "resource", rName)
718+
return err
720719
}
721720
}
722721
// At downsizing, requests should shrink prior to limits in order to keep "requests <= limits".
@@ -734,21 +733,25 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(ctx context.Context, pod *
734733
}
735734
if len(podContainerChanges.ContainersToUpdate[v1.ResourceMemory]) > 0 || podContainerChanges.UpdatePodResources {
736735
if podResources.Memory == nil {
737-
result.Fail(fmt.Errorf("podResources.Memory is nil for pod %s", format.Pod(pod)))
736+
klog.ErrorS(nil, "podResources.Memory is nil", "pod", pod.Name)
737+
result.Fail(fmt.Errorf("podResources.Memory is nil for pod %s", pod.Name))
738738
return
739739
}
740740
currentPodMemoryConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceMemory)
741741
if err != nil {
742-
result.Fail(fmt.Errorf("GetPodCgroupConfig for memory failed for pod %s: %w", format.Pod(pod), err))
742+
klog.ErrorS(err, "GetPodCgroupConfig for memory failed", "pod", pod.Name)
743+
result.Fail(err)
743744
return
744745
}
745746
currentPodMemoryUsage, err := pcm.GetPodCgroupMemoryUsage(pod)
746747
if err != nil {
747-
result.Fail(fmt.Errorf("GetPodCgroupMemoryUsage failed for pod %s", format.Pod(pod)))
748+
klog.ErrorS(err, "GetPodCgroupMemoryUsage failed", "pod", pod.Name)
749+
result.Fail(err)
748750
return
749751
}
750752
if currentPodMemoryUsage >= uint64(*podResources.Memory) {
751-
result.Fail(fmt.Errorf("aborting attempt to set pod memory limit less than current memory usage for pod %s", format.Pod(pod)))
753+
klog.ErrorS(nil, "Aborting attempt to set pod memory limit less than current memory usage", "pod", pod.Name)
754+
result.Fail(fmt.Errorf("aborting attempt to set pod memory limit less than current memory usage for pod %s", pod.Name))
752755
return
753756
}
754757
if errResize := resizeContainers(v1.ResourceMemory, int64(*currentPodMemoryConfig.Memory), *podResources.Memory, 0, 0); errResize != nil {
@@ -758,12 +761,14 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(ctx context.Context, pod *
758761
}
759762
if len(podContainerChanges.ContainersToUpdate[v1.ResourceCPU]) > 0 || podContainerChanges.UpdatePodResources {
760763
if podResources.CPUQuota == nil || podResources.CPUShares == nil {
761-
result.Fail(fmt.Errorf("podResources.CPUQuota or podResources.CPUShares is nil for pod %s", format.Pod(pod)))
764+
klog.ErrorS(nil, "podResources.CPUQuota or podResources.CPUShares is nil", "pod", pod.Name)
765+
result.Fail(fmt.Errorf("podResources.CPUQuota or podResources.CPUShares is nil for pod %s", pod.Name))
762766
return
763767
}
764768
currentPodCpuConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceCPU)
765769
if err != nil {
766-
result.Fail(fmt.Errorf("GetPodCgroupConfig for CPU failed for pod %s", format.Pod(pod)))
770+
klog.ErrorS(err, "GetPodCgroupConfig for CPU failed", "pod", pod.Name)
771+
result.Fail(err)
767772
return
768773
}
769774
if errResize := resizeContainers(v1.ResourceCPU, *currentPodCpuConfig.CPUQuota, *podResources.CPUQuota,
@@ -772,20 +777,17 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(ctx context.Context, pod *
772777
return
773778
}
774779
}
775-
return
776780
}
777781

778-
func (m *kubeGenericRuntimeManager) updatePodContainerResources(ctx context.Context, pod *v1.Pod, resourceName v1.ResourceName, containersToUpdate []containerToUpdateInfo) (updateResults []*kubecontainer.SyncResult, err error) {
782+
func (m *kubeGenericRuntimeManager) updatePodContainerResources(pod *v1.Pod, resourceName v1.ResourceName, containersToUpdate []containerToUpdateInfo) error {
779783
klog.V(5).InfoS("Updating container resources", "pod", klog.KObj(pod))
780784

781785
for _, cInfo := range containersToUpdate {
782-
var updateContainerResult *kubecontainer.SyncResult
783786
container := pod.Spec.Containers[cInfo.apiContainerIdx].DeepCopy()
784787
// If updating memory limit, use most recently configured CPU request and limit values.
785788
// If updating CPU request and limit, use most recently configured memory request and limit values.
786789
switch resourceName {
787790
case v1.ResourceMemory:
788-
updateContainerResult = kubecontainer.NewSyncResult(kubecontainer.UpdateContainerMemory, container.Name)
789791
container.Resources.Limits = v1.ResourceList{
790792
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.currentContainerResources.cpuLimit, resource.DecimalSI),
791793
v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryLimit, resource.BinarySI),
@@ -795,7 +797,6 @@ func (m *kubeGenericRuntimeManager) updatePodContainerResources(ctx context.Cont
795797
v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryRequest, resource.BinarySI),
796798
}
797799
case v1.ResourceCPU:
798-
updateContainerResult = kubecontainer.NewSyncResult(kubecontainer.UpdateContainerCPU, container.Name)
799800
container.Resources.Limits = v1.ResourceList{
800801
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.desiredContainerResources.cpuLimit, resource.DecimalSI),
801802
v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryLimit, resource.BinarySI),
@@ -805,19 +806,12 @@ func (m *kubeGenericRuntimeManager) updatePodContainerResources(ctx context.Cont
805806
v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryRequest, resource.BinarySI),
806807
}
807808
}
808-
updateResults = append(updateResults, updateContainerResult)
809-
if err = m.updateContainerResources(ctx, pod, container, cInfo.kubeContainerID); err != nil {
809+
if err := m.updateContainerResources(pod, container, cInfo.kubeContainerID); err != nil {
810810
// Log error and abort as container updates need to succeed in the order determined by computePodResizeAction.
811811
// The recovery path is for SyncPod to keep retrying at later times until it succeeds.
812812
klog.ErrorS(err, "updateContainerResources failed", "container", container.Name, "cID", cInfo.kubeContainerID,
813813
"pod", format.Pod(pod), "resourceName", resourceName)
814-
switch resourceName {
815-
case v1.ResourceMemory:
816-
updateContainerResult.Fail(kubecontainer.ErrUpdateContainerMemory, err.Error())
817-
case v1.ResourceCPU:
818-
updateContainerResult.Fail(kubecontainer.ErrUpdateContainerCPU, err.Error())
819-
}
820-
return
814+
return err
821815
}
822816
resizeKey := fmt.Sprintf("%s:resize:%s", container.Name, resourceName)
823817

@@ -857,7 +851,7 @@ func (m *kubeGenericRuntimeManager) updatePodContainerResources(ctx context.Cont
857851
cInfo.currentContainerResources.cpuRequest = cInfo.desiredContainerResources.cpuRequest
858852
}
859853
}
860-
return
854+
return nil
861855
}
862856

863857
// computePodActions checks whether the pod spec has changed and returns the changes if true.
@@ -1357,11 +1351,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
13571351
// Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources
13581352
if IsInPlacePodVerticalScalingAllowed(pod) {
13591353
if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources {
1360-
resizeResult := m.doPodResizeAction(ctx, pod, podStatus, podContainerChanges)
1361-
result.AddPodSyncResult(resizeResult)
1362-
if resizeResult.Error() != nil {
1363-
klog.ErrorS(resizeResult.Error(), "doPodResizeAction failed")
1364-
}
1354+
m.doPodResizeAction(pod, podStatus, podContainerChanges, result)
13651355
}
13661356
}
13671357

0 commit comments

Comments
 (0)