Skip to content

Commit 1f7d4e3

Browse files
author
Daniel Yoo
committed
Merge branch 'master' of github.com:yoodan93/aws-step-functions-data-science-sdk-python
2 parents bfae3d9 + 9518733 commit 1f7d4e3

18 files changed

+102
-91
lines changed

Diff for: README.rst

-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@ Supported Python Versions
104104

105105
The AWS Step Functions Data Science SDK is tested on:
106106

107-
* Python 2.7
108107
* Python 3.6
109108

110109
Overview of SDK

Diff for: VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.1.1
1+
2.0.0-rc1

Diff for: doc/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def __getattr__(cls, name):
9494
htmlhelp_basename = "%sdoc" % project
9595

9696
intersphinx_mapping = {
97-
"https://docs.python.org/2.7/": None,
97+
"https://docs.python.org/3.6/": None,
9898
"https://boto3.readthedocs.io/en/latest": None,
9999
"https://sagemaker.readthedocs.io/en/stable": None,
100100
}

Diff for: requirements.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
sagemaker>=1.71.0,<2.0.0
2-
boto3>=1.9.213
1+
sagemaker>=2.1.0
2+
boto3>=1.14.38
33
pyyaml

Diff for: setup.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ def read_version():
3030

3131
# Declare minimal set for installation
3232
required_packages = [
33-
"sagemaker>=1.71.0,<2.0.0",
34-
"boto3>=1.9.213",
33+
"sagemaker>=2.1.0",
34+
"boto3>=1.14.38",
3535
"pyyaml"
3636
]
3737

@@ -56,7 +56,6 @@ def read_version():
5656
"Natural Language :: English",
5757
"License :: OSI Approved :: Apache Software License",
5858
"Programming Language :: Python",
59-
"Programming Language :: Python :: 2.7",
6059
"Programming Language :: Python :: 3.6",
6160
],
6261
install_requires=required_packages,

Diff for: src/stepfunctions/steps/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from stepfunctions.steps.states import Pass, Succeed, Fail, Wait, Choice, Parallel, Map, Task, Chain, Retry, Catch
1818
from stepfunctions.steps.states import Graph, FrozenGraph
19-
from stepfunctions.steps.sagemaker import TrainingStep, TransformStep, ModelStep, EndpointConfigStep, EndpointStep, ProcessingStep
19+
from stepfunctions.steps.sagemaker import TrainingStep, TransformStep, ModelStep, EndpointConfigStep, EndpointStep, TuningStep, ProcessingStep
2020
from stepfunctions.steps.compute import LambdaStep, BatchSubmitJobStep, GlueStartJobRunStep, EcsRunTaskStep
2121
from stepfunctions.steps.service import DynamoDBGetItemStep, DynamoDBPutItemStep, DynamoDBUpdateItemStep, DynamoDBDeleteItemStep
2222
from stepfunctions.steps.service import SnsPublishStep, SqsSendMessageStep

Diff for: src/stepfunctions/steps/sagemaker.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@ def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=Non
3636
data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator, as this can take any of the following forms:
3737
3838
* (str) - The S3 location where training data is saved.
39-
* (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple
39+
* (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
4040
channels for training data, you can specify a dict mapping channel names to
41-
strings or :func:`~sagemaker.session.s3_input` objects.
42-
* (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can
41+
strings or :func:`~sagemaker.inputs.TrainingInput` objects.
42+
* (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
4343
provide additional information about the training dataset. See
44-
:func:`sagemaker.session.s3_input` for full details.
44+
:func:`sagemaker.inputs.TrainingInput` for full details.
4545
* (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
4646
Amazon :class:`Record` objects serialized and stored in S3.
4747
For use with an estimator for an Amazon algorithm.
@@ -198,11 +198,11 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No
198198
state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
199199
model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here.
200200
model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
201-
instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. This parameter is typically required when the estimator used is not an `Amazon built-in algorithm <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_.
201+
instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
202202
tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
203203
"""
204204
if isinstance(model, FrameworkModel):
205-
parameters = model_config(model=model, instance_type=instance_type, role=model.role, image=model.image)
205+
parameters = model_config(model=model, instance_type=instance_type, role=model.role, image_uri=model.image_uri)
206206
if model_name:
207207
parameters['ModelName'] = model_name
208208
elif isinstance(model, Model):
@@ -211,7 +211,7 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No
211211
'ModelName': model_name or model.name,
212212
'PrimaryContainer': {
213213
'Environment': {},
214-
'Image': model.image,
214+
'Image': model.image_uri,
215215
'ModelDataUrl': model.model_data
216216
}
217217
}
@@ -322,12 +322,12 @@ def __init__(self, state_id, tuner, job_name, data, wait_for_completion=True, ta
322322
data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms:
323323
324324
* (str) - The S3 location where training data is saved.
325-
* (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple
325+
* (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
326326
channels for training data, you can specify a dict mapping channel names to
327-
strings or :func:`~sagemaker.session.s3_input` objects.
328-
* (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can
327+
strings or :func:`~sagemaker.inputs.TrainingInput` objects.
328+
* (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
329329
provide additional information about the training dataset. See
330-
:func:`sagemaker.session.s3_input` for full details.
330+
:func:`sagemaker.inputs.TrainingInput` for full details.
331331
* (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
332332
Amazon :class:`Record` objects serialized and stored in S3.
333333
For use with an estimator for an Amazon algorithm.

Diff for: src/stepfunctions/template/pipeline/inference.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ def __init__(self, preprocessor, estimator, inputs, s3_bucket, role, client=None
4848
inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms:
4949
5050
* (str) - The S3 location where training data is saved.
51-
* (dict[str, str] or dict[str, `sagemaker.session.s3_input`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.session.s3_input` objects.
52-
* (`sagemaker.session.s3_input`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.session.s3_input` for full details.
51+
* (dict[str, str] or dict[str, `sagemaker.inputs.TrainingInput`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.inputs.TrainingInput` objects.
52+
* (`sagemaker.inputs.TrainingInput`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.inputs.TrainingInput` for full details.
5353
* (`sagemaker.amazon.amazon_estimator.RecordSet`) - A collection of Amazon `Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm.
5454
* (list[`sagemaker.amazon.amazon_estimator.RecordSet`]) - A list of `sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data.
5555
s3_bucket (str): S3 bucket under which the output artifacts from the training job will be stored. The parent path used is built using the format: ``s3://{s3_bucket}/{pipeline_name}/models/{job_name}/``. In this format, `pipeline_name` refers to the keyword argument provided for TrainingPipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-<timestamp>`. Also, in the format, `job_name` refers to the job name provided when calling the :meth:`TrainingPipeline.run()` method.
@@ -87,8 +87,8 @@ def build_workflow_definition(self):
8787
"""
8888
default_name = self.pipeline_name
8989

90-
train_instance_type = self.preprocessor.train_instance_type
91-
train_instance_count = self.preprocessor.train_instance_count
90+
instance_type = self.preprocessor.instance_type
91+
instance_count = self.preprocessor.instance_count
9292

9393
# Preprocessor for feature transformation
9494
preprocessor_train_step = TrainingStep(
@@ -100,13 +100,13 @@ def build_workflow_definition(self):
100100
preprocessor_model = self.preprocessor.create_model()
101101
preprocessor_model_step = ModelStep(
102102
StepId.CreatePreprocessorModel.value,
103-
instance_type=train_instance_type,
103+
instance_type=instance_type,
104104
model=preprocessor_model,
105105
model_name=default_name
106106
)
107107
preprocessor_transform_step = TransformStep(
108108
StepId.TransformInput.value,
109-
transformer=self.preprocessor.transformer(instance_count=train_instance_count, instance_type=train_instance_type, max_payload=20),
109+
transformer=self.preprocessor.transformer(instance_count=instance_count, instance_type=instance_type, max_payload=20),
110110
job_name=default_name,
111111
model_name=default_name,
112112
data=self.inputs['train'],
@@ -115,8 +115,8 @@ def build_workflow_definition(self):
115115
)
116116

117117
# Training
118-
train_instance_type = self.estimator.train_instance_type
119-
train_instance_count = self.estimator.train_instance_count
118+
instance_type = self.estimator.instance_type
119+
instance_count = self.estimator.instance_count
120120

121121
training_step = TrainingStep(
122122
StepId.Train.value,
@@ -135,21 +135,21 @@ def build_workflow_definition(self):
135135
)
136136
pipeline_model_step = ModelStep(
137137
StepId.CreatePipelineModel.value,
138-
instance_type=train_instance_type,
138+
instance_type=instance_type,
139139
model=preprocessor_model,
140140
model_name=default_name
141141
)
142-
pipeline_model_step.parameters = self.pipeline_model_config(train_instance_type, pipeline_model)
142+
pipeline_model_step.parameters = self.pipeline_model_config(instance_type, pipeline_model)
143143

144-
deployable_model = Model(model_data='', image='')
144+
deployable_model = Model(model_data='', image_uri='')
145145

146146
# Deployment
147147
endpoint_config_step = EndpointConfigStep(
148148
StepId.ConfigureEndpoint.value,
149149
endpoint_config_name=default_name,
150150
model_name=default_name,
151-
initial_instance_count=train_instance_count,
152-
instance_type=train_instance_type
151+
initial_instance_count=instance_count,
152+
instance_type=instance_type
153153
)
154154

155155
deploy_step = EndpointStep(

Diff for: src/stepfunctions/template/pipeline/train.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ def __init__(self, estimator, role, inputs, s3_bucket, client=None, **kwargs):
4343
inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms:
4444
4545
* (str) - The S3 location where training data is saved.
46-
* (dict[str, str] or dict[str, `sagemaker.session.s3_input`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.session.s3_input` objects.
47-
* (`sagemaker.session.s3_input`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.session.s3_input` for full details.
46+
* (dict[str, str] or dict[str, `sagemaker.inputs.TrainingInput`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.inputs.TrainingInput` objects.
47+
* (`sagemaker.inputs.TrainingInput`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.inputs.TrainingInput` for full details.
4848
* (`sagemaker.amazon.amazon_estimator.RecordSet`) - A collection of Amazon `Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm.
4949
* (list[`sagemaker.amazon.amazon_estimator.RecordSet`]) - A list of `sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data.
5050
s3_bucket (str): S3 bucket under which the output artifacts from the training job will be stored. The parent path used is built using the format: ``s3://{s3_bucket}/{pipeline_name}/models/{job_name}/``. In this format, `pipeline_name` refers to the keyword argument provided for TrainingPipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-<timestamp>`. Also, in the format, `job_name` refers to the job name provided when calling the :meth:`TrainingPipeline.run()` method.
@@ -79,8 +79,8 @@ def build_workflow_definition(self):
7979
"""
8080
default_name = self.pipeline_name
8181

82-
train_instance_type = self.estimator.train_instance_type
83-
train_instance_count = self.estimator.train_instance_count
82+
instance_type = self.estimator.instance_type
83+
instance_count = self.estimator.instance_count
8484

8585
training_step = TrainingStep(
8686
StepId.Train.value,
@@ -92,7 +92,7 @@ def build_workflow_definition(self):
9292
model = self.estimator.create_model()
9393
model_step = ModelStep(
9494
StepId.CreateModel.value,
95-
instance_type=train_instance_type,
95+
instance_type=instance_type,
9696
model=model,
9797
model_name=default_name
9898
)
@@ -101,8 +101,8 @@ def build_workflow_definition(self):
101101
StepId.ConfigureEndpoint.value,
102102
endpoint_config_name=default_name,
103103
model_name=default_name,
104-
initial_instance_count=train_instance_count,
105-
instance_type=train_instance_type
104+
initial_instance_count=instance_count,
105+
instance_type=instance_type
106106
)
107107
deploy_step = EndpointStep(
108108
StepId.Deploy.value,

Diff for: tests/integ/conftest.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ def sagemaker_role_arn(aws_account_id):
5353
def pca_estimator_fixture(sagemaker_role_arn):
5454
estimator = pca.PCA(
5555
role=sagemaker_role_arn,
56-
train_instance_count=1,
57-
train_instance_type="ml.m5.large",
56+
instance_count=1,
57+
instance_type="ml.m5.large",
5858
num_components=48
5959
)
6060
return estimator

Diff for: tests/integ/test_inference_pipeline.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,11 @@ def sklearn_preprocessor(sagemaker_role_arn, sagemaker_session):
4343
'one_p_mnist',
4444
'sklearn_mnist_preprocessor.py')
4545
sklearn_preprocessor = SKLearn(
46+
framework_version='0.20.0',
47+
py_version='py3',
4648
entry_point=script_path,
4749
role=sagemaker_role_arn,
48-
train_instance_type="ml.m5.large",
50+
instance_type="ml.m5.large",
4951
sagemaker_session=sagemaker_session,
5052
hyperparameters={"epochs": 1},
5153
)
@@ -58,9 +60,11 @@ def sklearn_estimator(sagemaker_role_arn, sagemaker_session):
5860
'one_p_mnist',
5961
'sklearn_mnist_estimator.py')
6062
sklearn_estimator = SKLearn(
63+
framework_version='0.20.0',
64+
py_version='py3',
6165
entry_point=script_path,
6266
role=sagemaker_role_arn,
63-
train_instance_type="ml.m5.large",
67+
instance_type="ml.m5.large",
6468
sagemaker_session=sagemaker_session,
6569
hyperparameters={"epochs": 1},
6670
input_mode='File'

Diff for: tests/integ/test_sagemaker_steps.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,8 @@ def test_tuning_step(sfn_client, record_set_for_hyperparameter_tuning, sagemaker
254254

255255
kmeans = KMeans(
256256
role=sagemaker_role_arn,
257-
train_instance_count=1,
258-
train_instance_type=INSTANCE_TYPE,
257+
instance_count=1,
258+
instance_type=INSTANCE_TYPE,
259259
k=10
260260
)
261261

Diff for: tests/integ/test_state_machine_definition.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import json
1717

1818
from sagemaker.utils import unique_name_from_base
19-
from sagemaker.amazon.amazon_estimator import get_image_uri
19+
from sagemaker.image_uris import retrieve
2020
from stepfunctions import steps
2121
from stepfunctions.workflow import Workflow
2222
from tests.integ.utils import state_machine_delete_wait
@@ -25,7 +25,7 @@
2525
def training_job_parameters(sagemaker_session, sagemaker_role_arn, record_set_fixture):
2626
parameters = {
2727
"AlgorithmSpecification": {
28-
"TrainingImage": get_image_uri(sagemaker_session.boto_session.region_name, 'pca'),
28+
"TrainingImage": retrieve(region=sagemaker_session.boto_session.region_name, framework='pca'),
2929
"TrainingInputMode": "File"
3030
},
3131
"OutputDataConfig": {

Diff for: tests/integ/test_training_pipeline_estimators.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
# import Sagemaker
2727
from sagemaker.amazon.pca import PCA
28-
from sagemaker.amazon.amazon_estimator import get_image_uri
28+
from sagemaker.image_uris import retrieve
2929

3030
# import StepFunctions
3131
from stepfunctions.template.pipeline import TrainingPipeline
@@ -50,8 +50,8 @@ def pca_estimator(sagemaker_role_arn):
5050
pca_estimator = PCA(
5151
role=sagemaker_role_arn,
5252
num_components=1,
53-
train_instance_count=1,
54-
train_instance_type='ml.m5.large',
53+
instance_count=1,
54+
instance_type='ml.m5.large',
5555
)
5656

5757
pca_estimator.feature_dim=500
@@ -105,7 +105,7 @@ def test_pca_estimator(sfn_client, sagemaker_session, sagemaker_role_arn, sfn_ro
105105
job_name = workflow_execution_info['name']
106106
s3_manifest_uri = inputs.s3_data
107107
status = 'SUCCEEDED'
108-
estimator_image_uri = get_image_uri(sagemaker_session.boto_region_name, 'pca')
108+
estimator_image_uri = retrieve(region=sagemaker_session.boto_region_name, framework='pca')
109109

110110
execution_info = sfn_client.describe_execution(executionArn=execution_arn)
111111
execution_info['input'] = json.loads(execution_info['input'])
@@ -115,10 +115,14 @@ def test_pca_estimator(sfn_client, sagemaker_session, sagemaker_role_arn, sfn_ro
115115
s3_output_path = 's3://{bucket_name}/{workflow_name}/models'.format(bucket_name=bucket_name, workflow_name=unique_name)
116116
expected_execution_info = {'executionArn': execution_arn,
117117
'stateMachineArn': state_machine_arn,
118+
'inputDetails': {'included': True},
118119
'name': job_name,
120+
'outputDetails': {'included': True},
119121
'status': status,
120122
'startDate': execution_info['startDate'],
121123
'stopDate': execution_info['stopDate'],
124+
'inputDetails': {'included': True},
125+
'outputDetails': {'included': True},
122126
'input': {'Training': {'AlgorithmSpecification': {'TrainingImage': estimator_image_uri,
123127
'TrainingInputMode': 'File'},
124128
'OutputDataConfig': {'S3OutputPath': s3_output_path},

0 commit comments

Comments
 (0)