Skip to content

Commit 58d89a4

Browse files
Merge pull request #17000 from mfojtik/dc-timeout
Automatic merge from submit-queue (batch tested with PRs 17020, 17026, 17000, 17010). apps: deployment config stuck in the new state should respect timeoutSeconds Fixes: #16962 With this patch the deployment config controller will set the deployment as failed (timeout) after it reaches timeoutSeconds and the status of the deployment is 'new'. This generally happens when the deployment is not able to create the deployer pod (quota). We should not wait infinitely to have the quota.
2 parents 6cf2dbd + 81ab37e commit 58d89a4

File tree

5 files changed

+182
-0
lines changed

5 files changed

+182
-0
lines changed

pkg/apps/apis/apps/types.go

+1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ const (
113113
DeploymentCancelledNewerDeploymentExists = "newer deployment was found running"
114114
DeploymentFailedUnrelatedDeploymentExists = "unrelated pod with the same name as this deployment is already running"
115115
DeploymentFailedDeployerPodNoLongerExists = "deployer pod no longer exists"
116+
DeploymentFailedUnableToCreateDeployerPod = "unable to create deployer pod"
116117
)
117118

118119
// DeploymentStatus describes the possible states a deployment can be in.

pkg/apps/controller/deployer/deployer_controller.go

+15
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,21 @@ func (c *DeploymentController) handle(deployment *v1.ReplicationController, will
127127
}
128128
break
129129
}
130+
// In case the deployment is stuck in "new" state because we fail to create
131+
// deployer pod (quota, etc..) we should respect the timeoutSeconds in the
132+
// config strategy and transition the rollout to failed instead of waiting for
133+
// the deployment pod forever.
134+
config, err := deployutil.DecodeDeploymentConfig(deployment, c.codec)
135+
if err != nil {
136+
return err
137+
}
138+
if deployutil.RolloutExceededTimeoutSeconds(config, deployment) {
139+
nextStatus = deployapi.DeploymentStatusFailed
140+
updatedAnnotations[deployapi.DeploymentStatusReasonAnnotation] = deployapi.DeploymentFailedUnableToCreateDeployerPod
141+
c.emitDeploymentEvent(deployment, v1.EventTypeWarning, "RolloutTimeout", fmt.Sprintf("Rollout for %q failed to create deployer pod (timeoutSeconds: %ds)", deployutil.LabelForDeploymentV1(deployment), deployutil.GetTimeoutSecondsForStrategy(config)))
142+
glog.V(4).Infof("Failing deployment %s/%s as we reached timeout while waiting for the deployer pod to be created", deployment.Namespace, deployment.Name)
143+
break
144+
}
130145

131146
switch {
132147
case kerrors.IsNotFound(deployerErr):

pkg/apps/controller/deployer/deployer_controller_test.go

+16
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ func TestHandle_createPodOk(t *testing.T) {
138138
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
139139
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
140140
deployment.Spec.Template.Spec.NodeSelector = map[string]string{"labelKey1": "labelValue1", "labelKey2": "labelValue2"}
141+
deployment.CreationTimestamp = metav1.Now()
141142

142143
controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)
143144

@@ -227,6 +228,7 @@ func TestHandle_createPodFail(t *testing.T) {
227228
config := deploytest.OkDeploymentConfig(1)
228229
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
229230
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
231+
deployment.CreationTimestamp = metav1.Now()
230232

231233
controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)
232234

@@ -282,6 +284,7 @@ func TestHandle_deployerPodAlreadyExists(t *testing.T) {
282284
config := deploytest.OkDeploymentConfig(1)
283285
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
284286
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
287+
deployment.CreationTimestamp = metav1.Now()
285288
deployerPodName := deployutil.DeployerPodNameForDeployment(deployment.Name)
286289

287290
client := &fake.Clientset{}
@@ -321,6 +324,7 @@ func TestHandle_unrelatedPodAlreadyExists(t *testing.T) {
321324

322325
config := deploytest.OkDeploymentConfig(1)
323326
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
327+
deployment.CreationTimestamp = metav1.Now()
324328
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
325329

326330
client := &fake.Clientset{}
@@ -362,6 +366,7 @@ func TestHandle_unrelatedPodAlreadyExistsTestScaled(t *testing.T) {
362366
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
363367
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
364368
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
369+
deployment.CreationTimestamp = metav1.Now()
365370
one := int32(1)
366371
deployment.Spec.Replicas = &one
367372

@@ -433,6 +438,7 @@ func TestHandle_noop(t *testing.T) {
433438

434439
deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
435440
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)
441+
deployment.CreationTimestamp = metav1.Now()
436442

437443
controller := okDeploymentController(client, deployment, nil, true, test.podPhase)
438444

@@ -476,6 +482,7 @@ func TestHandle_failedTest(t *testing.T) {
476482
// Verify successful cleanup
477483
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
478484
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
485+
deployment.CreationTimestamp = metav1.Now()
479486
one := int32(1)
480487
deployment.Spec.Replicas = &one
481488
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusRunning)
@@ -519,6 +526,7 @@ func TestHandle_cleanupPodOk(t *testing.T) {
519526
config := deploytest.OkDeploymentConfig(1)
520527
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
521528
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)
529+
deployment.CreationTimestamp = metav1.Now()
522530

523531
controller := okDeploymentController(client, deployment, hookPods, true, v1.PodSucceeded)
524532
hookPods = append(hookPods, deployment.Name)
@@ -562,6 +570,7 @@ func TestHandle_cleanupPodOkTest(t *testing.T) {
562570
// Verify successful cleanup
563571
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
564572
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
573+
deployment.CreationTimestamp = metav1.Now()
565574
one := int32(1)
566575
deployment.Spec.Replicas = &one
567576
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusRunning)
@@ -606,6 +615,7 @@ func TestHandle_cleanupPodNoop(t *testing.T) {
606615
// Verify no-op
607616
config := deploytest.OkDeploymentConfig(1)
608617
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
618+
deployment.CreationTimestamp = metav1.Now()
609619
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)
610620

611621
controller := okDeploymentController(client, deployment, nil, true, v1.PodSucceeded)
@@ -637,6 +647,7 @@ func TestHandle_cleanupPodFail(t *testing.T) {
637647
// Verify error
638648
config := deploytest.OkDeploymentConfig(1)
639649
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
650+
deployment.CreationTimestamp = metav1.Now()
640651
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)
641652

642653
controller := okDeploymentController(client, deployment, nil, true, v1.PodSucceeded)
@@ -667,6 +678,7 @@ func TestHandle_cancelNew(t *testing.T) {
667678
})
668679

669680
deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
681+
deployment.CreationTimestamp = metav1.Now()
670682
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
671683
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue
672684

@@ -688,6 +700,7 @@ func TestHandle_cleanupNewWithDeployers(t *testing.T) {
688700
deletedDeployer := false
689701

690702
deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
703+
deployment.CreationTimestamp = metav1.Now()
691704
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
692705
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue
693706

@@ -782,6 +795,7 @@ func TestHandle_cleanupPostNew(t *testing.T) {
782795
})
783796

784797
deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
798+
deployment.CreationTimestamp = metav1.Now()
785799
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue
786800
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)
787801

@@ -845,6 +859,7 @@ func TestHandle_deployerPodDisappeared(t *testing.T) {
845859
continue
846860
}
847861
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.phase)
862+
deployment.CreationTimestamp = metav1.Now()
848863
updatedDeployment = deployment
849864

850865
controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)
@@ -980,6 +995,7 @@ func TestHandle_transitionFromDeployer(t *testing.T) {
980995

981996
deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
982997
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)
998+
deployment.CreationTimestamp = metav1.Now()
983999

9841000
controller := okDeploymentController(client, deployment, nil, true, test.podPhase)
9851001

pkg/apps/util/util.go

+42
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,12 @@ func IsTerminatedDeployment(deployment runtime.Object) bool {
652652
return IsCompleteDeployment(deployment) || IsFailedDeployment(deployment)
653653
}
654654

655+
// IsNewDeployment returns true if the passed deployment is in new state.
656+
func IsNewDeployment(deployment runtime.Object) bool {
657+
current := DeploymentStatusFor(deployment)
658+
return current == deployapi.DeploymentStatusNew
659+
}
660+
655661
// IsCompleteDeployment returns true if the passed deployment is in state complete.
656662
func IsCompleteDeployment(deployment runtime.Object) bool {
657663
current := DeploymentStatusFor(deployment)
@@ -782,6 +788,42 @@ func DeploymentsForCleanup(configuration *deployapi.DeploymentConfig, deployment
782788
return relevantDeployments
783789
}
784790

791+
// GetTimeoutSecondsForStrategy returns the timeout in seconds defined in the
792+
// deployment config strategy.
793+
func GetTimeoutSecondsForStrategy(config *deployapi.DeploymentConfig) int64 {
794+
var timeoutSeconds int64
795+
switch config.Spec.Strategy.Type {
796+
case deployapi.DeploymentStrategyTypeRolling:
797+
timeoutSeconds = deployapi.DefaultRollingTimeoutSeconds
798+
if t := config.Spec.Strategy.RollingParams.TimeoutSeconds; t != nil {
799+
timeoutSeconds = *t
800+
}
801+
case deployapi.DeploymentStrategyTypeRecreate:
802+
timeoutSeconds = deployapi.DefaultRecreateTimeoutSeconds
803+
if t := config.Spec.Strategy.RecreateParams.TimeoutSeconds; t != nil {
804+
timeoutSeconds = *t
805+
}
806+
case deployapi.DeploymentStrategyTypeCustom:
807+
timeoutSeconds = deployapi.DefaultRecreateTimeoutSeconds
808+
}
809+
return timeoutSeconds
810+
}
811+
812+
// RolloutExceededTimeoutSeconds returns true if the current deployment exceeded
813+
// the timeoutSeconds defined for its strategy.
814+
// Note that this is different than activeDeadlineSeconds which is the timeout
815+
// set for the deployer pod. In some cases, the deployer pod cannot be created
816+
// (like quota, etc...). In that case deployer controller use this function to
817+
// measure if the created deployment (RC) exceeded the timeout.
818+
func RolloutExceededTimeoutSeconds(config *deployapi.DeploymentConfig, latestRC *v1.ReplicationController) bool {
819+
timeoutSeconds := GetTimeoutSecondsForStrategy(config)
820+
// If user set the timeoutSeconds to 0, we assume there should be no timeout.
821+
if timeoutSeconds <= 0 {
822+
return false
823+
}
824+
return int64(time.Since(latestRC.CreationTimestamp.Time).Seconds()) > timeoutSeconds
825+
}
826+
785827
// WaitForRunningDeployerPod waits a given period of time until the deployer pod
786828
// for given replication controller is not running.
787829
func WaitForRunningDeployerPod(podClient kcoreclient.PodsGetter, rc *api.ReplicationController, timeout time.Duration) error {

pkg/apps/util/util_test.go

+108
Original file line numberDiff line numberDiff line change
@@ -586,3 +586,111 @@ func TestRemoveCondition(t *testing.T) {
586586
}
587587
}
588588
}
589+
590+
func TestRolloutExceededTimeoutSeconds(t *testing.T) {
591+
now := time.Now()
592+
tests := []struct {
593+
name string
594+
config *deployapi.DeploymentConfig
595+
deploymentCreationTime time.Time
596+
expectTimeout bool
597+
}{
598+
// Recreate strategy with deployment running for 20s (exceeding 10s timeout)
599+
{
600+
name: "recreate timeout",
601+
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
602+
config := deploytest.OkDeploymentConfig(1)
603+
config.Spec.Strategy.RecreateParams.TimeoutSeconds = &timeoutSeconds
604+
return config
605+
}(int64(10)),
606+
deploymentCreationTime: now.Add(-20 * time.Second),
607+
expectTimeout: true,
608+
},
609+
// Recreate strategy with no timeout
610+
{
611+
name: "recreate no timeout",
612+
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
613+
config := deploytest.OkDeploymentConfig(1)
614+
config.Spec.Strategy.RecreateParams.TimeoutSeconds = &timeoutSeconds
615+
return config
616+
}(int64(0)),
617+
deploymentCreationTime: now.Add(-700 * time.Second),
618+
expectTimeout: false,
619+
},
620+
621+
// Rolling strategy with deployment running for 20s (exceeding 10s timeout)
622+
{
623+
name: "rolling timeout",
624+
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
625+
config := deploytest.OkDeploymentConfig(1)
626+
config.Spec.Strategy = deploytest.OkRollingStrategy()
627+
config.Spec.Strategy.RollingParams.TimeoutSeconds = &timeoutSeconds
628+
return config
629+
}(int64(10)),
630+
deploymentCreationTime: now.Add(-20 * time.Second),
631+
expectTimeout: true,
632+
},
633+
// Rolling strategy with deployment with no timeout specified.
634+
{
635+
name: "rolling using default timeout",
636+
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
637+
config := deploytest.OkDeploymentConfig(1)
638+
config.Spec.Strategy = deploytest.OkRollingStrategy()
639+
config.Spec.Strategy.RollingParams.TimeoutSeconds = nil
640+
return config
641+
}(0),
642+
deploymentCreationTime: now.Add(-20 * time.Second),
643+
expectTimeout: false,
644+
},
645+
// Recreate strategy with deployment with no timeout specified.
646+
{
647+
name: "recreate using default timeout",
648+
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
649+
config := deploytest.OkDeploymentConfig(1)
650+
config.Spec.Strategy.RecreateParams.TimeoutSeconds = nil
651+
return config
652+
}(0),
653+
deploymentCreationTime: now.Add(-20 * time.Second),
654+
expectTimeout: false,
655+
},
656+
// Custom strategy with deployment with no timeout specified.
657+
{
658+
name: "custom using default timeout",
659+
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
660+
config := deploytest.OkDeploymentConfig(1)
661+
config.Spec.Strategy = deploytest.OkCustomStrategy()
662+
return config
663+
}(0),
664+
deploymentCreationTime: now.Add(-20 * time.Second),
665+
expectTimeout: false,
666+
},
667+
// Custom strategy use default timeout exceeding it.
668+
{
669+
name: "custom using default timeout timing out",
670+
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
671+
config := deploytest.OkDeploymentConfig(1)
672+
config.Spec.Strategy = deploytest.OkCustomStrategy()
673+
return config
674+
}(0),
675+
deploymentCreationTime: now.Add(-700 * time.Second),
676+
expectTimeout: true,
677+
},
678+
}
679+
680+
for _, tc := range tests {
681+
config := tc.config
682+
deployment, err := MakeDeploymentV1(config, kapi.Codecs.LegacyCodec(deployv1.SchemeGroupVersion))
683+
if err != nil {
684+
t.Fatalf("unexpected error: %v", err)
685+
}
686+
deployment.ObjectMeta.CreationTimestamp = metav1.Time{Time: tc.deploymentCreationTime}
687+
gotTimeout := RolloutExceededTimeoutSeconds(config, deployment)
688+
if tc.expectTimeout && !gotTimeout {
689+
t.Errorf("[%s]: expected timeout, but got no timeout", tc.name)
690+
}
691+
if !tc.expectTimeout && gotTimeout {
692+
t.Errorf("[%s]: expected no timeout, but got timeout", tc.name)
693+
}
694+
695+
}
696+
}

0 commit comments

Comments
 (0)