Merge branch 'master' of github.com:yoodan93/aws-step-functions-data-science-sdk-python

Daniel Yoo · Daniel Yoo · commit 1f7d4e30006a · 2020-09-24T23:59:39.000-04:00
diff --git a/README.rst b/README.rst
@@ -104,7 +104,6 @@ Supported Python Versions
 
 The AWS Step Functions Data Science SDK is tested on:
 
-* Python 2.7
 * Python 3.6
 
 Overview of SDK
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.1.1
+2.0.0-rc1
diff --git a/doc/conf.py b/doc/conf.py
@@ -94,7 +94,7 @@ def __getattr__(cls, name):
 htmlhelp_basename = "%sdoc" % project
 
 intersphinx_mapping = {
-    "https://docs.python.org/2.7/": None,
+    "https://docs.python.org/3.6/": None,
     "https://boto3.readthedocs.io/en/latest": None,
     "https://sagemaker.readthedocs.io/en/stable": None,
 }
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
-sagemaker>=1.71.0,<2.0.0
-boto3>=1.9.213
+sagemaker>=2.1.0
+boto3>=1.14.38
 pyyaml
diff --git a/setup.py b/setup.py
@@ -30,8 +30,8 @@ def read_version():
 
 # Declare minimal set for installation
 required_packages = [
-    "sagemaker>=1.71.0,<2.0.0",
-    "boto3>=1.9.213",
+    "sagemaker>=2.1.0",
+    "boto3>=1.14.38",
     "pyyaml"
 ]
 
@@ -56,7 +56,6 @@ def read_version():
         "Natural Language :: English",
         "License :: OSI Approved :: Apache Software License",
         "Programming Language :: Python",
-        "Programming Language :: Python :: 2.7",
         "Programming Language :: Python :: 3.6",
     ],
     install_requires=required_packages,
diff --git a/src/stepfunctions/steps/__init__.py b/src/stepfunctions/steps/__init__.py
@@ -16,7 +16,7 @@
 
 from stepfunctions.steps.states import Pass, Succeed, Fail, Wait, Choice, Parallel, Map, Task, Chain, Retry, Catch
 from stepfunctions.steps.states import Graph, FrozenGraph
-from stepfunctions.steps.sagemaker import TrainingStep, TransformStep, ModelStep, EndpointConfigStep, EndpointStep, ProcessingStep
+from stepfunctions.steps.sagemaker import TrainingStep, TransformStep, ModelStep, EndpointConfigStep, EndpointStep, TuningStep, ProcessingStep
 from stepfunctions.steps.compute import LambdaStep, BatchSubmitJobStep, GlueStartJobRunStep, EcsRunTaskStep
 from stepfunctions.steps.service import DynamoDBGetItemStep, DynamoDBPutItemStep, DynamoDBUpdateItemStep, DynamoDBDeleteItemStep
 from stepfunctions.steps.service import SnsPublishStep, SqsSendMessageStep
diff --git a/src/stepfunctions/steps/sagemaker.py b/src/stepfunctions/steps/sagemaker.py
@@ -36,12 +36,12 @@ def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=Non
             data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator, as this can take any of the following forms:
 
                 * (str) - The S3 location where training data is saved.
-                * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple
+                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
                     channels for training data, you can specify a dict mapping channel names to
-                    strings or :func:`~sagemaker.session.s3_input` objects.
-                * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can
+                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
+                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
                     provide additional information about the training dataset. See
-                    :func:`sagemaker.session.s3_input` for full details.
+                    :func:`sagemaker.inputs.TrainingInput` for full details.
                 * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                     Amazon :class:`Record` objects serialized and stored in S3.
                     For use with an estimator for an Amazon algorithm.
@@ -198,11 +198,11 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No
             state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
             model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here.
             model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
-            instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. This parameter is typically required when the estimator used is not an `Amazon built-in algorithm <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_.
+            instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
             tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
         """
         if isinstance(model, FrameworkModel):
-            parameters = model_config(model=model, instance_type=instance_type, role=model.role, image=model.image)
+            parameters = model_config(model=model, instance_type=instance_type, role=model.role, image_uri=model.image_uri)
             if model_name:
                 parameters['ModelName'] = model_name
         elif isinstance(model, Model):
@@ -211,7 +211,7 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No
                 'ModelName': model_name or model.name,
                 'PrimaryContainer': {
                     'Environment': {},
-                    'Image': model.image,
+                    'Image': model.image_uri,
                     'ModelDataUrl': model.model_data
                 }
             }
@@ -322,12 +322,12 @@ def __init__(self, state_id, tuner, job_name, data, wait_for_completion=True, ta
             data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms:
 
                 * (str) - The S3 location where training data is saved.
-                * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple
+                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
                     channels for training data, you can specify a dict mapping channel names to
-                    strings or :func:`~sagemaker.session.s3_input` objects.
-                * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can
+                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
+                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
                     provide additional information about the training dataset. See
-                    :func:`sagemaker.session.s3_input` for full details.
+                    :func:`sagemaker.inputs.TrainingInput` for full details.
                 * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                     Amazon :class:`Record` objects serialized and stored in S3.
                     For use with an estimator for an Amazon algorithm.
diff --git a/src/stepfunctions/template/pipeline/inference.py b/src/stepfunctions/template/pipeline/inference.py
@@ -48,8 +48,8 @@ def __init__(self, preprocessor, estimator, inputs, s3_bucket, role, client=None
             inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms:
 
                 * (str) - The S3 location where training data is saved.
-                * (dict[str, str] or dict[str, `sagemaker.session.s3_input`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.session.s3_input` objects.
-                * (`sagemaker.session.s3_input`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.session.s3_input` for full details.
+                * (dict[str, str] or dict[str, `sagemaker.inputs.TrainingInput`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.inputs.TrainingInput` objects.
+                * (`sagemaker.inputs.TrainingInput`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.inputs.TrainingInput` for full details.
                 * (`sagemaker.amazon.amazon_estimator.RecordSet`) - A collection of Amazon `Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm.
                 * (list[`sagemaker.amazon.amazon_estimator.RecordSet`]) - A list of `sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data.
             s3_bucket (str): S3 bucket under which the output artifacts from the training job will be stored. The parent path used is built using the format: ``s3://{s3_bucket}/{pipeline_name}/models/{job_name}/``. In this format, `pipeline_name` refers to the keyword argument provided for TrainingPipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-<timestamp>`. Also, in the format, `job_name` refers to the job name provided when calling the :meth:`TrainingPipeline.run()` method.
@@ -87,8 +87,8 @@ def build_workflow_definition(self):
         """
         default_name = self.pipeline_name
 
-        train_instance_type = self.preprocessor.train_instance_type
-        train_instance_count = self.preprocessor.train_instance_count
+        instance_type = self.preprocessor.instance_type
+        instance_count = self.preprocessor.instance_count
 
         # Preprocessor for feature transformation
         preprocessor_train_step = TrainingStep(
@@ -100,13 +100,13 @@ def build_workflow_definition(self):
         preprocessor_model = self.preprocessor.create_model()
         preprocessor_model_step = ModelStep(
             StepId.CreatePreprocessorModel.value,
-            instance_type=train_instance_type,
+            instance_type=instance_type,
             model=preprocessor_model,
             model_name=default_name
         )
         preprocessor_transform_step = TransformStep(
             StepId.TransformInput.value,
-            transformer=self.preprocessor.transformer(instance_count=train_instance_count, instance_type=train_instance_type, max_payload=20),
+            transformer=self.preprocessor.transformer(instance_count=instance_count, instance_type=instance_type, max_payload=20),
             job_name=default_name,
             model_name=default_name,
             data=self.inputs['train'],
@@ -115,8 +115,8 @@ def build_workflow_definition(self):
         )
 
         # Training
-        train_instance_type = self.estimator.train_instance_type
-        train_instance_count = self.estimator.train_instance_count
+        instance_type = self.estimator.instance_type
+        instance_count = self.estimator.instance_count
 
         training_step = TrainingStep(
             StepId.Train.value,
@@ -135,21 +135,21 @@ def build_workflow_definition(self):
         )
         pipeline_model_step = ModelStep(
             StepId.CreatePipelineModel.value,
-            instance_type=train_instance_type,
+            instance_type=instance_type,
             model=preprocessor_model,
             model_name=default_name
         )
-        pipeline_model_step.parameters = self.pipeline_model_config(train_instance_type, pipeline_model)
+        pipeline_model_step.parameters = self.pipeline_model_config(instance_type, pipeline_model)
 
-        deployable_model = Model(model_data='', image='')
+        deployable_model = Model(model_data='', image_uri='')
 
         # Deployment
         endpoint_config_step = EndpointConfigStep(
             StepId.ConfigureEndpoint.value,
             endpoint_config_name=default_name,
             model_name=default_name,
-            initial_instance_count=train_instance_count,
-            instance_type=train_instance_type
+            initial_instance_count=instance_count,
+            instance_type=instance_type
         )
 
         deploy_step = EndpointStep(
diff --git a/src/stepfunctions/template/pipeline/train.py b/src/stepfunctions/template/pipeline/train.py
@@ -43,8 +43,8 @@ def __init__(self, estimator, role, inputs, s3_bucket, client=None, **kwargs):
             inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms:
 
                 * (str) - The S3 location where training data is saved.
-                * (dict[str, str] or dict[str, `sagemaker.session.s3_input`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.session.s3_input` objects.
-                * (`sagemaker.session.s3_input`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.session.s3_input` for full details.
+                * (dict[str, str] or dict[str, `sagemaker.inputs.TrainingInput`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.inputs.TrainingInput` objects.
+                * (`sagemaker.inputs.TrainingInput`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.inputs.TrainingInput` for full details.
                 * (`sagemaker.amazon.amazon_estimator.RecordSet`) - A collection of Amazon `Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm.
                 * (list[`sagemaker.amazon.amazon_estimator.RecordSet`]) - A list of `sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data.
             s3_bucket (str): S3 bucket under which the output artifacts from the training job will be stored. The parent path used is built using the format: ``s3://{s3_bucket}/{pipeline_name}/models/{job_name}/``. In this format, `pipeline_name` refers to the keyword argument provided for TrainingPipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-<timestamp>`. Also, in the format, `job_name` refers to the job name provided when calling the :meth:`TrainingPipeline.run()` method.
@@ -79,8 +79,8 @@ def build_workflow_definition(self):
         """
         default_name = self.pipeline_name
 
-        train_instance_type = self.estimator.train_instance_type
-        train_instance_count = self.estimator.train_instance_count
+        instance_type = self.estimator.instance_type
+        instance_count = self.estimator.instance_count
 
         training_step = TrainingStep(
             StepId.Train.value,
@@ -92,7 +92,7 @@ def build_workflow_definition(self):
         model = self.estimator.create_model()
         model_step = ModelStep(
             StepId.CreateModel.value,
-            instance_type=train_instance_type,
+            instance_type=instance_type,
             model=model,
             model_name=default_name
         )
@@ -101,8 +101,8 @@ def build_workflow_definition(self):
             StepId.ConfigureEndpoint.value,
             endpoint_config_name=default_name,
             model_name=default_name,
-            initial_instance_count=train_instance_count,
-            instance_type=train_instance_type
+            initial_instance_count=instance_count,
+            instance_type=instance_type
         )
         deploy_step = EndpointStep(
             StepId.Deploy.value,
diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py
@@ -53,8 +53,8 @@ def sagemaker_role_arn(aws_account_id):
 def pca_estimator_fixture(sagemaker_role_arn):
     estimator = pca.PCA(
         role=sagemaker_role_arn,
-        train_instance_count=1,
-        train_instance_type="ml.m5.large",
+        instance_count=1,
+        instance_type="ml.m5.large",
         num_components=48
     )
     return estimator
diff --git a/tests/integ/test_inference_pipeline.py b/tests/integ/test_inference_pipeline.py
@@ -43,9 +43,11 @@ def sklearn_preprocessor(sagemaker_role_arn, sagemaker_session):
                                'one_p_mnist',
                                'sklearn_mnist_preprocessor.py')
     sklearn_preprocessor = SKLearn(
+        framework_version='0.20.0',
+        py_version='py3',
         entry_point=script_path,
         role=sagemaker_role_arn,
-        train_instance_type="ml.m5.large",
+        instance_type="ml.m5.large",
         sagemaker_session=sagemaker_session,
         hyperparameters={"epochs": 1},
     )
@@ -58,9 +60,11 @@ def sklearn_estimator(sagemaker_role_arn, sagemaker_session):
                                'one_p_mnist',
                                'sklearn_mnist_estimator.py')
     sklearn_estimator = SKLearn(
+        framework_version='0.20.0',
+        py_version='py3',
         entry_point=script_path,
         role=sagemaker_role_arn,
-        train_instance_type="ml.m5.large",
+        instance_type="ml.m5.large",
         sagemaker_session=sagemaker_session,
         hyperparameters={"epochs": 1},
         input_mode='File'
diff --git a/tests/integ/test_sagemaker_steps.py b/tests/integ/test_sagemaker_steps.py
@@ -254,8 +254,8 @@ def test_tuning_step(sfn_client, record_set_for_hyperparameter_tuning, sagemaker
 
     kmeans = KMeans(
         role=sagemaker_role_arn,
-        train_instance_count=1,
-        train_instance_type=INSTANCE_TYPE,
+        instance_count=1,
+        instance_type=INSTANCE_TYPE,
         k=10
     )
 
diff --git a/tests/integ/test_state_machine_definition.py b/tests/integ/test_state_machine_definition.py
@@ -16,7 +16,7 @@
 import json
 
 from sagemaker.utils import unique_name_from_base
-from sagemaker.amazon.amazon_estimator import get_image_uri
+from sagemaker.image_uris import retrieve 
 from stepfunctions import steps
 from stepfunctions.workflow import Workflow
 from tests.integ.utils import state_machine_delete_wait
@@ -25,7 +25,7 @@
 def training_job_parameters(sagemaker_session, sagemaker_role_arn, record_set_fixture):
     parameters = { 
         "AlgorithmSpecification": { 
-            "TrainingImage": get_image_uri(sagemaker_session.boto_session.region_name, 'pca'),
+            "TrainingImage": retrieve(region=sagemaker_session.boto_session.region_name, framework='pca'),
             "TrainingInputMode": "File"
         },
         "OutputDataConfig": { 
diff --git a/tests/integ/test_training_pipeline_estimators.py b/tests/integ/test_training_pipeline_estimators.py
@@ -25,7 +25,7 @@
 
 # import Sagemaker
 from sagemaker.amazon.pca import PCA
-from sagemaker.amazon.amazon_estimator import get_image_uri
+from sagemaker.image_uris import retrieve 
 
 # import StepFunctions
 from stepfunctions.template.pipeline import TrainingPipeline
@@ -50,8 +50,8 @@ def pca_estimator(sagemaker_role_arn):
     pca_estimator = PCA(
         role=sagemaker_role_arn,
         num_components=1,
-        train_instance_count=1,
-        train_instance_type='ml.m5.large',
+        instance_count=1,
+        instance_type='ml.m5.large',
         )
 
     pca_estimator.feature_dim=500
@@ -105,7 +105,7 @@ def test_pca_estimator(sfn_client, sagemaker_session, sagemaker_role_arn, sfn_ro
         job_name = workflow_execution_info['name']
         s3_manifest_uri = inputs.s3_data
         status = 'SUCCEEDED'
-        estimator_image_uri = get_image_uri(sagemaker_session.boto_region_name, 'pca')
+        estimator_image_uri = retrieve(region=sagemaker_session.boto_region_name,  framework='pca')
 
         execution_info = sfn_client.describe_execution(executionArn=execution_arn)
         execution_info['input'] = json.loads(execution_info['input'])
@@ -115,10 +115,14 @@ def test_pca_estimator(sfn_client, sagemaker_session, sagemaker_role_arn, sfn_ro
         s3_output_path = 's3://{bucket_name}/{workflow_name}/models'.format(bucket_name=bucket_name, workflow_name=unique_name)
         expected_execution_info = {'executionArn': execution_arn,
          'stateMachineArn': state_machine_arn,
+         'inputDetails': {'included': True},
          'name': job_name,
+         'outputDetails': {'included': True},
          'status': status,
          'startDate': execution_info['startDate'],
          'stopDate': execution_info['stopDate'],
+         'inputDetails': {'included': True},
+         'outputDetails': {'included': True},
          'input': {'Training': {'AlgorithmSpecification': {'TrainingImage': estimator_image_uri,
             'TrainingInputMode': 'File'},
            'OutputDataConfig': {'S3OutputPath': s3_output_path},
diff --git a/tests/integ/test_training_pipeline_framework_estimator.py b/tests/integ/test_training_pipeline_framework_estimator.py
diff --git a/tests/unit/test_pipeline.py b/tests/unit/test_pipeline.py
diff --git a/tests/unit/test_sagemaker_steps.py b/tests/unit/test_sagemaker_steps.py
diff --git a/tox.ini b/tox.ini

Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ def __getattr__(cls, name):`
`94`	`94`	`htmlhelp_basename = "%sdoc" % project`
`95`	`95`
`96`	`96`	`intersphinx_mapping = {`
`97`		`- "https://docs.python.org/2.7/": None,`
	`97`	`+ "https://docs.python.org/3.6/": None,`
`98`	`98`	`"https://boto3.readthedocs.io/en/latest": None,`
`99`	`99`	`"https://sagemaker.readthedocs.io/en/stable": None,`
`100`	`100`	`}`
Original file line number	Diff line number	Diff line change
`@@ -254,8 +254,8 @@ def test_tuning_step(sfn_client, record_set_for_hyperparameter_tuning, sagemaker`
`254`	`254`
`255`	`255`	`kmeans = KMeans(`
`256`	`256`	`role=sagemaker_role_arn,`
`257`		`- train_instance_count=1,`
`258`		`- train_instance_type=INSTANCE_TYPE,`
	`257`	`+ instance_count=1,`
	`258`	`+ instance_type=INSTANCE_TYPE,`
`259`	`259`	`k=10`
`260`	`260`	`)`
`261`	`261`