Skip to content

Cherrypick: Refactor e2e deletion and unmark adopted Endpoint Test (#216) #219

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions test/e2e/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import random

from acktest.k8s import resource as k8s
from common import config as cfg


SERVICE_NAME = "sagemaker"
CRD_GROUP = "sagemaker.services.k8s.aws"
Expand Down Expand Up @@ -372,3 +374,15 @@ def get_sagemaker_pipeline(pipeline_name: str):
f"SageMaker could not find a pipeline with the name {pipeline_name}. Error {error}"
)
return None


def delete_custom_resource(
reference,
wait_period=cfg.JOB_DELETE_WAIT_PERIODS,
wait_length=cfg.JOB_DELETE_WAIT_LENGTH,
):
deleted = True

if k8s.get_resource_exists(reference):
_, deleted = k8s.delete_custom_resource(reference, wait_period, wait_length)
return deleted
2 changes: 1 addition & 1 deletion test/e2e/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
DELETE_WAIT_PERIOD = 4
DELETE_WAIT_LENGTH = 30

JOB_DELETE_WAIT_PERIODS = 12
JOB_DELETE_WAIT_PERIODS = 18
JOB_DELETE_WAIT_LENGTH = 30

TAG_DELAY_SLEEP = 20
12 changes: 7 additions & 5 deletions test/e2e/common/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import logging
from e2e import (
create_sagemaker_resource,
delete_custom_resource,
wait_sagemaker_endpoint_status,
)

Expand Down Expand Up @@ -91,8 +92,9 @@ def xgboost_churn_endpoint(sagemaker_client):
yield endpoint_spec

for cr in (model_reference, endpoint_config_reference, endpoint_reference):
_, deleted = k8s.delete_custom_resource(cr, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH)
assert deleted
assert delete_custom_resource(
cr, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)


@pytest.fixture(scope="module")
Expand All @@ -118,6 +120,6 @@ def xgboost_churn_data_quality_job_definition(xgboost_churn_endpoint):

yield (job_definition_reference, resource)

if k8s.get_resource_exists(job_definition_reference):
_, deleted = k8s.delete_custom_resource(job_definition_reference, 3, 10)
assert deleted
assert delete_custom_resource(
job_definition_reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)
15 changes: 8 additions & 7 deletions test/e2e/tests/test_adopt_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
CRD_VERSION,
create_adopted_resource,
wait_sagemaker_endpoint_status,
delete_custom_resource,
assert_endpoint_status_in_sync,
sagemaker_client,
get_sagemaker_endpoint,
Expand Down Expand Up @@ -169,13 +170,12 @@ def adopted_endpoint(sdk_endpoint):
yield (adopt_model_reference, adopt_config_reference, adopt_endpoint_reference)

for cr in (adopt_model_reference, adopt_config_reference, adopt_endpoint_reference):
if k8s.get_resource_exists(cr):
_, deleted = k8s.delete_custom_resource(cr, 3, 10)
assert deleted
assert delete_custom_resource(
cr, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)


@service_marker
@pytest.mark.canary
class TestAdoptedEndpoint:
def test_smoke(self, sdk_endpoint, adopted_endpoint):
(
Expand Down Expand Up @@ -264,12 +264,13 @@ def test_smoke(self, sdk_endpoint, adopted_endpoint):
)

assert_endpoint_status_in_sync(
endpoint_name, endpoint_reference, cfg.ENDPOINT_STATUS_INSERVICE,
endpoint_name,
endpoint_reference,
cfg.ENDPOINT_STATUS_INSERVICE,
)
assert k8s.wait_on_condition(endpoint_reference, "ACK.ResourceSynced", "True")

for cr in (model_reference, config_reference, endpoint_reference):
_, deleted = k8s.delete_custom_resource(
assert delete_custom_resource(
cr, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)
assert deleted
31 changes: 20 additions & 11 deletions test/e2e/tests/test_adopt_model_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
create_adopted_resource,
wait_sagemaker_model_package_status,
assert_model_package_status_in_sync,
delete_custom_resource,
get_sagemaker_model_package_group,
get_sagemaker_model_package,
sagemaker_client,
Expand Down Expand Up @@ -62,8 +63,12 @@ def sdk_make_model_package(model_package_group_name):
"ModelDataUrl": f"s3://{data_bucket}/sagemaker/model/xgboost-mnist-model.tar.gz",
}
],
"SupportedContentTypes": ["text/csv",],
"SupportedResponseMIMETypes": ["text/csv",],
"SupportedContentTypes": [
"text/csv",
],
"SupportedResponseMIMETypes": [
"text/csv",
],
},
}

Expand Down Expand Up @@ -126,7 +131,9 @@ def adopted_model_package(sdk_model_package):
adopt_model_package_group_reference,
_,
adopt_model_package_group_resource,
) = create_adopted_resource(replacements=replacements,)
) = create_adopted_resource(
replacements=replacements,
)
assert adopt_model_package_group_resource is not None

# adopt model package
Expand All @@ -144,16 +151,17 @@ def adopted_model_package(sdk_model_package):
_,
adopt_model_package_resource,
) = create_adopted_resource(
replacements=replacements, spec_file="adopted_resource_base_arn",
replacements=replacements,
spec_file="adopted_resource_base_arn",
)
assert adopt_model_package_resource is not None

yield (adopt_model_package_group_reference, adopt_model_package_reference)

for cr in (adopt_model_package_group_reference, adopt_model_package_reference):
if k8s.get_resource_exists(cr):
_, deleted = k8s.delete_custom_resource(cr, 3, 10)
assert deleted
assert delete_custom_resource(
cr, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)


@service_marker
Expand Down Expand Up @@ -236,14 +244,15 @@ def test_smoke(self, sdk_model_package, adopted_model_package):
) == model_package_response.get("ModelPackageArn", None)

assert_model_package_status_in_sync(
model_package_arn, model_package_reference, cfg.JOB_STATUS_COMPLETED,
model_package_arn,
model_package_reference,
cfg.JOB_STATUS_COMPLETED,
)
assert k8s.wait_on_condition(
model_package_reference, "ACK.ResourceSynced", "True"
)

for cr in (model_package_reference, model_package_group_reference):
_, deleted = k8s.delete_custom_resource(
cr, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH
assert delete_custom_resource(
cr, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)
assert deleted
12 changes: 7 additions & 5 deletions test/e2e/tests/test_cross_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from e2e import (
service_marker,
create_sagemaker_resource,
delete_custom_resource,
get_sagemaker_model,
sagemaker_client,
)
Expand Down Expand Up @@ -54,9 +55,9 @@ def cross_region_model():
yield (reference, resource)

# Delete the k8s resource if not already deleted by tests
if k8s.get_resource_exists(reference):
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
assert deleted
assert delete_custom_resource(
reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)


def get_cross_region():
Expand All @@ -79,7 +80,8 @@ def test_create_cross_region_model(self, cross_region_model):
assert k8s.get_resource_arn(resource) == cross_region_model_arn

# Delete the k8s resource.
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
assert deleted
assert delete_custom_resource(
reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)

assert get_sagemaker_model(model_name, sm_client) is None
6 changes: 4 additions & 2 deletions test/e2e/tests/test_data_quality_job_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
)
from e2e.common import config as cfg
from acktest.k8s import resource as k8s
from e2e import delete_custom_resource

# Access variable so it is loaded as a fixture
_accessed = xgboost_churn_data_quality_job_definition, xgboost_churn_endpoint
Expand Down Expand Up @@ -63,8 +64,9 @@ def test_smoke(self, sagemaker_client, xgboost_churn_data_quality_job_definition
resource_tags = resource["spec"].get("tags", None)
assert_tags_in_sync(job_definition_arn, resource_tags)
# Delete the k8s resource.
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
assert deleted
assert delete_custom_resource(
reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)
assert (
describe_sagemaker_data_quality_job_definition(
sagemaker_client, job_definition_name
Expand Down
41 changes: 27 additions & 14 deletions test/e2e/tests/test_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from e2e import (
service_marker,
create_sagemaker_resource,
delete_custom_resource,
assert_endpoint_status_in_sync,
assert_tags_in_sync,
get_sagemaker_endpoint,
Expand Down Expand Up @@ -65,7 +66,9 @@ def single_container_model(name_suffix):

yield (model_reference, model_resource)

_, deleted = k8s.delete_custom_resource(model_reference, 3, 10)
_, deleted = k8s.delete_custom_resource(
model_reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)
assert deleted


Expand Down Expand Up @@ -94,7 +97,9 @@ def multi_variant_config(name_suffix, single_container_model):

yield (config_reference, config_resource)

_, deleted = k8s.delete_custom_resource(config_reference, 3, 10)
_, deleted = k8s.delete_custom_resource(
config_reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)
assert deleted


Expand Down Expand Up @@ -123,7 +128,9 @@ def single_variant_config(name_suffix, single_container_model):

yield (config_reference, config_resource)

_, deleted = k8s.delete_custom_resource(config_reference, 3, 10)
_, deleted = k8s.delete_custom_resource(
config_reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)
assert deleted


Expand All @@ -149,10 +156,7 @@ def xgboost_endpoint(name_suffix, single_variant_config):
yield (reference, resource, spec)

# Delete the k8s resource if not already deleted by tests
if k8s.get_resource_exists(reference):
# longer wait incase endpoint is in creating/updating status
_, deleted = k8s.delete_custom_resource(reference, 40, cfg.DELETE_WAIT_LENGTH)
assert deleted
assert delete_custom_resource(reference, 40, cfg.DELETE_WAIT_LENGTH)


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -207,7 +211,9 @@ def faulty_config(name_suffix, single_container_model):
yield (config_reference, config_resource)

for cr in (model_reference, config_reference):
_, deleted = k8s.delete_custom_resource(cr, 3, 10)
_, deleted = k8s.delete_custom_resource(
cr, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)
assert deleted


Expand Down Expand Up @@ -255,7 +261,9 @@ def update_endpoint_failed_test(

# endpoint transitions Updating -> InService state
assert_endpoint_status_in_sync(
endpoint_reference.name, endpoint_reference, cfg.ENDPOINT_STATUS_UPDATING,
endpoint_reference.name,
endpoint_reference,
cfg.ENDPOINT_STATUS_UPDATING,
)
assert k8s.wait_on_condition(endpoint_reference, "ACK.ResourceSynced", "False")
endpoint_resource = k8s.get_resource(endpoint_reference)
Expand All @@ -264,7 +272,9 @@ def update_endpoint_failed_test(
assert annotations[LAST_ENDPOINTCONFIG_UPDATE_ANNOTATION] == faulty_config_name

assert_endpoint_status_in_sync(
endpoint_reference.name, endpoint_reference, cfg.ENDPOINT_STATUS_INSERVICE,
endpoint_reference.name,
endpoint_reference,
cfg.ENDPOINT_STATUS_INSERVICE,
)

assert k8s.wait_on_condition(endpoint_reference, "ACK.ResourceSynced", "False")
Expand Down Expand Up @@ -302,7 +312,9 @@ def update_endpoint_successful_test(self, multi_variant_config, xgboost_endpoint

# endpoint transitions Updating -> InService state
assert_endpoint_status_in_sync(
endpoint_reference.name, endpoint_reference, cfg.ENDPOINT_STATUS_UPDATING,
endpoint_reference.name,
endpoint_reference,
cfg.ENDPOINT_STATUS_UPDATING,
)

assert k8s.wait_on_condition(endpoint_reference, "ACK.ResourceSynced", "False")
Expand All @@ -313,7 +325,9 @@ def update_endpoint_successful_test(self, multi_variant_config, xgboost_endpoint
assert annotations[LAST_ENDPOINTCONFIG_UPDATE_ANNOTATION] == new_config_name

assert_endpoint_status_in_sync(
endpoint_reference.name, endpoint_reference, cfg.ENDPOINT_STATUS_INSERVICE,
endpoint_reference.name,
endpoint_reference,
cfg.ENDPOINT_STATUS_INSERVICE,
)
assert k8s.wait_on_condition(endpoint_reference, "ACK.ResourceSynced", "True")
assert k8s.get_resource_condition(endpoint_reference, "ACK.Terminal") is None
Expand All @@ -336,10 +350,9 @@ def delete_endpoint_test(self, xgboost_endpoint):
(reference, resource, _) = xgboost_endpoint
endpoint_name = resource["spec"].get("endpointName", None)

_, deleted = k8s.delete_custom_resource(
assert delete_custom_resource(
reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)
assert deleted

assert get_sagemaker_endpoint(endpoint_name) is None

Expand Down
16 changes: 10 additions & 6 deletions test/e2e/tests/test_endpoint_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from e2e import (
service_marker,
create_sagemaker_resource,
delete_custom_resource,
assert_tags_in_sync,
get_sagemaker_endpoint_config,
)
Expand Down Expand Up @@ -63,11 +64,13 @@ def single_variant_config():

yield (config_reference, config_resource)

k8s.delete_custom_resource(model_reference, 3, 10)
k8s.delete_custom_resource(
model_reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)
# Delete the k8s resource if not already deleted by tests
if k8s.get_resource_exists(config_reference):
_, deleted = k8s.delete_custom_resource(config_reference, 3, 10)
assert deleted
assert delete_custom_resource(
config_reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)


@service_marker
Expand All @@ -87,7 +90,8 @@ def test_create_endpoint_config(self, single_variant_config):
resource_tags = resource["spec"].get("tags", None)
assert_tags_in_sync(endpoint_arn, resource_tags)
# Delete the k8s resource.
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
assert deleted
assert delete_custom_resource(
reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
)

assert get_sagemaker_endpoint_config(config_name) is None
Loading