From d672d7d0ff0167772e6ae888cc982131381c9a35 Mon Sep 17 00:00:00 2001 From: Shiv Lakshminarayan Date: Mon, 14 Jun 2021 14:31:16 -0700 Subject: [PATCH 1/4] fix: supplying hyperparameters to training step constructor drops hyperparameters specified in estimator --- src/stepfunctions/steps/sagemaker.py | 6 +- tests/unit/test_sagemaker_steps.py | 129 +++++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 1 deletion(-) diff --git a/src/stepfunctions/steps/sagemaker.py b/src/stepfunctions/steps/sagemaker.py index deb5176..daf623c 100644 --- a/src/stepfunctions/steps/sagemaker.py +++ b/src/stepfunctions/steps/sagemaker.py @@ -104,7 +104,11 @@ def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=Non parameters['TrainingJobName'] = job_name if hyperparameters is not None: - parameters['HyperParameters'] = hyperparameters + merged_hyperparameters = {} + if estimator.hyperparameters() is not None: + merged_hyperparameters.update(estimator.hyperparameters()) + merged_hyperparameters.update(hyperparameters) + parameters['HyperParameters'] = merged_hyperparameters if experiment_config is not None: parameters['ExperimentConfig'] = experiment_config diff --git a/tests/unit/test_sagemaker_steps.py b/tests/unit/test_sagemaker_steps.py index 2ce9d3b..00f016e 100644 --- a/tests/unit/test_sagemaker_steps.py +++ b/tests/unit/test_sagemaker_steps.py @@ -482,6 +482,135 @@ def test_training_step_creation_with_framework(tensorflow_estimator): 'End': True } +@patch('botocore.client.BaseClient._make_api_call', new=mock_boto_api_call) +@patch.object(boto3.session.Session, 'region_name', 'us-east-1') +def training_step_merges_hyperparameters_from_constructor_and_estimator(tensorflow_estimator): + step = TrainingStep('Training', + estimator=tensorflow_estimator, + data={'train': 's3://sagemaker/train'}, + job_name='tensorflow-job', + mini_batch_size=1024, + hyperparameters={ + 'key': 'value' + } + ) + + assert step.to_dict() == { + 'Type': 'Task', + 'Parameters': { + 'AlgorithmSpecification': { + 'TrainingImage': TENSORFLOW_IMAGE, + 'TrainingInputMode': 'File' + }, + 'InputDataConfig': [ + { + 'DataSource': { + 'S3DataSource': { + 'S3DataDistributionType': 'FullyReplicated', + 'S3DataType': 'S3Prefix', + 'S3Uri': 's3://sagemaker/train' + } + }, + 'ChannelName': 'train' + } + ], + 'OutputDataConfig': { + 'S3OutputPath': 's3://sagemaker/models' + }, + 'DebugHookConfig': { + 'S3OutputPath': 's3://sagemaker/models/debug' + }, + 'StoppingCondition': { + 'MaxRuntimeInSeconds': 86400 + }, + 'ResourceConfig': { + 'InstanceCount': 1, + 'InstanceType': 'ml.p2.xlarge', + 'VolumeSizeInGB': 30 + }, + 'RoleArn': EXECUTION_ROLE, + 'HyperParameters': { + 'checkpoint_path': '"s3://sagemaker/models/sagemaker-tensorflow/checkpoints"', + 'evaluation_steps': '100', + 'key': 'value', + 'sagemaker_container_log_level': '20', + 'sagemaker_job_name': '"tensorflow-job"', + 'sagemaker_program': '"tf_train.py"', + 'sagemaker_region': '"us-east-1"', + 'sagemaker_submit_directory': '"s3://sagemaker/source"', + 'training_steps': '1000', + }, + 'TrainingJobName': 'tensorflow-job', + }, + 'Resource': 'arn:aws:states:::sagemaker:createTrainingJob.sync', + 'End': True +} + + +@patch('botocore.client.BaseClient._make_api_call', new=mock_boto_api_call) +@patch.object(boto3.session.Session, 'region_name', 'us-east-1') +def training_step_uses_constructor_hyperparameters_when_duplicates_supplied_in_estimator(tensorflow_estimator): + step = TrainingStep('Training', + estimator=tensorflow_estimator, + data={'train': 's3://sagemaker/train'}, + job_name='tensorflow-job', + mini_batch_size=1024, + hyperparameters={ + # set as 1000 in estimator + 'training_steps': '500' + } + ) + + assert step.to_dict() == { + 'Type': 'Task', + 'Parameters': { + 'AlgorithmSpecification': { + 'TrainingImage': TENSORFLOW_IMAGE, + 'TrainingInputMode': 'File' + }, + 'InputDataConfig': [ + { + 'DataSource': { + 'S3DataSource': { + 'S3DataDistributionType': 'FullyReplicated', + 'S3DataType': 'S3Prefix', + 'S3Uri': 's3://sagemaker/train' + } + }, + 'ChannelName': 'train' + } + ], + 'OutputDataConfig': { + 'S3OutputPath': 's3://sagemaker/models' + }, + 'DebugHookConfig': { + 'S3OutputPath': 's3://sagemaker/models/debug' + }, + 'StoppingCondition': { + 'MaxRuntimeInSeconds': 86400 + }, + 'ResourceConfig': { + 'InstanceCount': 1, + 'InstanceType': 'ml.p2.xlarge', + 'VolumeSizeInGB': 30 + }, + 'RoleArn': EXECUTION_ROLE, + 'HyperParameters': { + 'checkpoint_path': '"s3://sagemaker/models/sagemaker-tensorflow/checkpoints"', + 'evaluation_steps': '100', + 'sagemaker_container_log_level': '20', + 'sagemaker_job_name': '"tensorflow-job"', + 'sagemaker_program': '"tf_train.py"', + 'sagemaker_region': '"us-east-1"', + 'sagemaker_submit_directory': '"s3://sagemaker/source"', + 'training_steps': '500', + }, + 'TrainingJobName': 'tensorflow-job', + }, + 'Resource': 'arn:aws:states:::sagemaker:createTrainingJob.sync', + 'End': True + } + @patch.object(boto3.session.Session, 'region_name', 'us-east-1') def test_transform_step_creation(pca_transformer): From efeaf21cf8dbb4890919b319bb07df3f80a2840c Mon Sep 17 00:00:00 2001 From: Shiv Lakshminarayan Date: Wed, 16 Jun 2021 22:24:35 -0700 Subject: [PATCH 2/4] feedback and update tests --- src/stepfunctions/steps/sagemaker.py | 32 +++++++++++++++++++++++----- tests/unit/test_sagemaker_steps.py | 4 ++-- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/stepfunctions/steps/sagemaker.py b/src/stepfunctions/steps/sagemaker.py index daf623c..d4a5c6b 100644 --- a/src/stepfunctions/steps/sagemaker.py +++ b/src/stepfunctions/steps/sagemaker.py @@ -12,6 +12,8 @@ # permissions and limitations under the License. from __future__ import absolute_import +import logging + from enum import Enum from stepfunctions.inputs import ExecutionInput, StepInput from stepfunctions.steps.states import Task @@ -23,6 +25,8 @@ from sagemaker.model import Model, FrameworkModel from sagemaker.model_monitor import DataCaptureConfig +logger = logging.getLogger('stepfunctions.sagemaker') + SAGEMAKER_SERVICE_NAME = "sagemaker" @@ -64,7 +68,7 @@ def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=Non * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data. - hyperparameters (dict, optional): Specify the hyper parameters for the training. (Default: None) + hyperparameters (dict, optional): Specify the hyperparameters that are set before the model begins training. If hyperparameters provided are also specified in the estimator, the provided value will used. (Default: Hyperparameters specified in the estimator will be used for training.) mini_batch_size (int): Specify this argument only when estimator is a built-in estimator of an Amazon algorithm. For other estimators, batch size should be specified in the estimator. experiment_config (dict, optional): Specify the experiment config for the training. (Default: None) wait_for_completion (bool, optional): Boolean value set to `True` if the Task state should wait for the training job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the training job and proceed to the next step. (default: True) @@ -104,11 +108,9 @@ def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=Non parameters['TrainingJobName'] = job_name if hyperparameters is not None: - merged_hyperparameters = {} if estimator.hyperparameters() is not None: - merged_hyperparameters.update(estimator.hyperparameters()) - merged_hyperparameters.update(hyperparameters) - parameters['HyperParameters'] = merged_hyperparameters + hyperparameters = self.__merge_hyperparameters(hyperparameters, estimator.hyperparameters()) + parameters['HyperParameters'] = hyperparameters if experiment_config is not None: parameters['ExperimentConfig'] = experiment_config @@ -139,6 +141,26 @@ def get_expected_model(self, model_name=None): model.model_data = self.output()["ModelArtifacts"]["S3ModelArtifacts"] return model + """ + Merges the hyperparameters supplied in the TrainingStep constructor with the hyperparameters + specified in the estimator. If there are duplicate entries, the value provided in the constructor + will be used. + """ + + def __merge_hyperparameters(self, training_step_hyperparameters, estimator_hyperparameters): + """ + Args: + training_step_hyperparameters (dict): Hyperparameters supplied in the training step constructor + estimator_hyperparameters (dict): Hyperparameters specified in the estimator + """ + merged_hyperparameters = estimator_hyperparameters.copy() + for key, value in training_step_hyperparameters.items(): + if key in merged_hyperparameters: + logger.info( + f"hyperparameter property: <{key}> with value: <{merged_hyperparameters[key]}> provided in the" + f" estimator will be overwritten with value provided in constructor: <{value}>") + merged_hyperparameters[key] = value + return merged_hyperparameters class TransformStep(Task): diff --git a/tests/unit/test_sagemaker_steps.py b/tests/unit/test_sagemaker_steps.py index 00f016e..7645f85 100644 --- a/tests/unit/test_sagemaker_steps.py +++ b/tests/unit/test_sagemaker_steps.py @@ -484,7 +484,7 @@ def test_training_step_creation_with_framework(tensorflow_estimator): @patch('botocore.client.BaseClient._make_api_call', new=mock_boto_api_call) @patch.object(boto3.session.Session, 'region_name', 'us-east-1') -def training_step_merges_hyperparameters_from_constructor_and_estimator(tensorflow_estimator): +def test_training_step_merges_hyperparameters_from_constructor_and_estimator(tensorflow_estimator): step = TrainingStep('Training', estimator=tensorflow_estimator, data={'train': 's3://sagemaker/train'}, @@ -549,7 +549,7 @@ def training_step_merges_hyperparameters_from_constructor_and_estimator(tensorfl @patch('botocore.client.BaseClient._make_api_call', new=mock_boto_api_call) @patch.object(boto3.session.Session, 'region_name', 'us-east-1') -def training_step_uses_constructor_hyperparameters_when_duplicates_supplied_in_estimator(tensorflow_estimator): +def test_training_step_uses_constructor_hyperparameters_when_duplicates_supplied_in_estimator(tensorflow_estimator): step = TrainingStep('Training', estimator=tensorflow_estimator, data={'train': 's3://sagemaker/train'}, From d34128ba99345f94a473286f74913be5a4b63b1c Mon Sep 17 00:00:00 2001 From: Shiv Lakshminarayan Date: Thu, 17 Jun 2021 14:20:33 -0700 Subject: [PATCH 3/4] clear up the documentation --- src/stepfunctions/steps/sagemaker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/stepfunctions/steps/sagemaker.py b/src/stepfunctions/steps/sagemaker.py index d4a5c6b..7163674 100644 --- a/src/stepfunctions/steps/sagemaker.py +++ b/src/stepfunctions/steps/sagemaker.py @@ -68,7 +68,9 @@ def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=Non * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data. - hyperparameters (dict, optional): Specify the hyperparameters that are set before the model begins training. If hyperparameters provided are also specified in the estimator, the provided value will used. (Default: Hyperparameters specified in the estimator will be used for training.) + hyperparameters (dict, optional): Parameters used for training. + Hyperparameters supplied will be merged with the Hyperparameters specified in the estimator. + If there are duplicate entries, the value provided through this property will be used. (Default: Hyperparameters specified in the estimator.) mini_batch_size (int): Specify this argument only when estimator is a built-in estimator of an Amazon algorithm. For other estimators, batch size should be specified in the estimator. experiment_config (dict, optional): Specify the experiment config for the training. (Default: None) wait_for_completion (bool, optional): Boolean value set to `True` if the Task state should wait for the training job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the training job and proceed to the next step. (default: True) From c431f7cf3415f391d54026584fc7577d513a69c3 Mon Sep 17 00:00:00 2001 From: Shiv Lakshminarayan Date: Thu, 17 Jun 2021 22:40:23 -0700 Subject: [PATCH 4/4] dummy commit: remove extra space --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 2799dd7..b6f6d72 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ to provision and integrate the AWS services separately. The AWS Step Functions Data Science SDK enables you to do the following. - Easily construct and run machine learning workflows that use AWS - infrastructure directly in Python + infrastructure directly in Python - Instantiate common training pipelines - Create standard machine learning workflows in a Jupyter notebook from templates