Skip to content

Commit dca96d4

Browse files
authored
Increasing test timeouts and disabling stop tests (#235)
Issue #, if available: Description of changes: Monitoring Schedule can sometimes reach the scheduled state before the controller can verify that it is in Pending state. This PR removes the check for pending/resource synced. Also increasing timeouts and disabling test_*_stopped tests By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent 7626546 commit dca96d4

7 files changed

+10
-29
lines changed

test/e2e/tests/test_feature_group.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
FEATURE_GROUP_STATUS_CREATING = "Creating"
3939
FEATURE_GROUP_STATUS_CREATED = "Created"
4040
# longer wait is used because we sometimes see server taking time to create/delete
41-
WAIT_PERIOD_COUNT = 8
41+
WAIT_PERIOD_COUNT = 9
4242
WAIT_PERIOD_LENGTH = 30
4343
STATUS = "status"
4444
RESOURCE_STATUS = "featureGroupStatus"
@@ -144,12 +144,6 @@ def test_create_feature_group(self, feature_group):
144144

145145
assert k8s.get_resource_arn(resource) == feature_group_arn
146146

147-
assert (
148-
feature_group_sm_desc["FeatureGroupStatus"] == FEATURE_GROUP_STATUS_CREATING
149-
)
150-
151-
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")
152-
153147
self._assert_feature_group_status_in_sync(
154148
feature_group_name, reference, FEATURE_GROUP_STATUS_CREATED
155149
)

test/e2e/tests/test_hpo.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def get_hpo_resource_status(reference: k8s.CustomResourceReference):
8686

8787

8888
@service_marker
89-
@pytest.mark.canary
9089
class TestHPO:
9190
def _wait_resource_hpo_status(
9291
self,
@@ -153,6 +152,7 @@ def test_stopped(self, xgboost_hpojob):
153152
hpo_sm_desc["HyperParameterTuningJobStatus"] in cfg.LIST_JOB_STATUS_STOPPED
154153
)
155154

155+
@pytest.mark.canary
156156
def test_completed(self, xgboost_hpojob):
157157
(reference, resource) = xgboost_hpojob
158158
assert k8s.get_resource_exists(reference)

test/e2e/tests/test_model_package.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def _wait_resource_model_package_status(
132132
self,
133133
reference: k8s.CustomResourceReference,
134134
expected_status: str,
135-
wait_periods: int = 30,
135+
wait_periods: int = 32,
136136
period_length: int = 30,
137137
):
138138
return wait_for_status(
@@ -147,7 +147,7 @@ def _wait_sagemaker_model_package_status(
147147
self,
148148
model_package_name,
149149
expected_status: str,
150-
wait_periods: int = 30,
150+
wait_periods: int = 32,
151151
period_length: int = 30,
152152
):
153153
return wait_for_status(

test/e2e/tests/test_monitoring_schedule.py

+1-14
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def _assert_monitoring_schedule_status_in_sync(
141141
schedule_name,
142142
reference,
143143
expected_status,
144-
wait_periods: int = 6,
144+
wait_periods: int = 7,
145145
period_length: int = 30,
146146
):
147147
assert (
@@ -165,19 +165,6 @@ def test_smoke(
165165
monitoring_schedule_arn = monitoring_schedule_desc["MonitoringScheduleArn"]
166166
assert k8s.get_resource_arn(resource) == monitoring_schedule_arn
167167

168-
# scheule transitions Pending -> Scheduled state
169-
# Pending status is shortlived only for 30 seconds because baselining job has already been run
170-
# remove the checks for Pending status if the test is flaky because of this
171-
# as the main objective is to test for Scheduled status
172-
# OR
173-
# create the schedule with a on-going baseline job where it waits for the baselining job to complete
174-
assert (
175-
wait_resource_monitoring_schedule_status(
176-
reference, self.STATUS_PENDING, 5, 2
177-
)
178-
== self.STATUS_PENDING
179-
)
180-
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False", 5, 2)
181168

182169
self._assert_monitoring_schedule_status_in_sync(
183170
sagemaker_client, monitoring_schedule_name, reference, self.STATUS_SCHEDULED

test/e2e/tests/test_processingjob.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def get_processing_resource_status(reference: k8s.CustomResourceReference):
8686

8787

8888
@service_marker
89-
@pytest.mark.canary
9089
class TestProcessingJob:
9190
def _wait_resource_processing_status(
9291
self,
@@ -152,6 +151,7 @@ def test_stopped(self, kmeans_processing_job):
152151
processing_job_desc = get_sagemaker_processing_job(processing_job_name)
153152
assert processing_job_desc["ProcessingJobStatus"] in cfg.LIST_JOB_STATUS_STOPPED
154153

154+
@pytest.mark.canary
155155
def test_completed(self, kmeans_processing_job):
156156
(reference, resource) = kmeans_processing_job
157157
assert k8s.get_resource_exists(reference)

test/e2e/tests/test_trainingjob.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ def xgboost_training_job():
5858
)
5959

6060

61-
@pytest.mark.canary
6261
@service_marker
6362
class TestTrainingJob:
6463
def test_stopped(self, xgboost_training_job):
@@ -86,6 +85,7 @@ def test_stopped(self, xgboost_training_job):
8685
training_job_desc = get_sagemaker_training_job(training_job_name)
8786
assert training_job_desc["TrainingJobStatus"] in cfg.LIST_JOB_STATUS_STOPPED
8887

88+
@pytest.mark.canary
8989
def test_completed(self, xgboost_training_job):
9090
(reference, resource) = xgboost_training_job
9191
assert k8s.get_resource_exists(reference)

test/e2e/tests/test_transformjob.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,12 @@ def get_transform_resource_status(reference: k8s.CustomResourceReference):
123123

124124

125125
@service_marker
126-
@pytest.mark.canary
127126
class TestTransformJob:
128127
def _wait_resource_transform_status(
129128
self,
130129
reference: k8s.CustomResourceReference,
131130
expected_status: str,
132-
wait_periods: int = 30,
131+
wait_periods: int = 32,
133132
period_length: int = 30,
134133
):
135134
return wait_for_status(
@@ -144,7 +143,7 @@ def _wait_sagemaker_transform_status(
144143
self,
145144
transform_job_name,
146145
expected_status: str,
147-
wait_periods: int = 30,
146+
wait_periods: int = 32,
148147
period_length: int = 30,
149148
):
150149
return wait_for_status(
@@ -188,6 +187,7 @@ def test_stopped(self, xgboost_transformjob):
188187
transform_sm_desc = get_sagemaker_transform_job(transform_job_name)
189188
assert transform_sm_desc["TransformJobStatus"] in cfg.LIST_JOB_STATUS_STOPPED
190189

190+
@pytest.mark.canary
191191
def test_completed(self, xgboost_transformjob):
192192
(reference, resource) = xgboost_transformjob
193193
assert k8s.get_resource_exists(reference)

0 commit comments

Comments
 (0)