Skip to content
This repository was archived by the owner on Nov 29, 2023. It is now read-only.

Commit f2a8017

Browse files
authored
feat: added dataproc workflows samples [(#3056)](GoogleCloudPlatform/python-docs-samples#3056)
* Added workflows sample
1 parent a0ca0b2 commit f2a8017

File tree

2 files changed

+129
-0
lines changed

2 files changed

+129
-0
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This sample walks a user through instantiating an inline
16+
# workflow for Cloud Dataproc using the Python client library.
17+
#
18+
# This script can be run on its own:
19+
# python workflows.py ${PROJECT_ID} ${REGION}
20+
21+
import sys
22+
# [START dataproc_instantiate_inline_workflow_template]
23+
from google.cloud import dataproc_v1 as dataproc
24+
25+
26+
def instantiate_inline_workflow_template(project_id, region):
27+
"""This sample walks a user through submitting a workflow
28+
for a Cloud Dataproc using the Python client library.
29+
30+
Args:
31+
project_id (string): Project to use for running the workflow.
32+
region (string): Region where the workflow resources should live.
33+
"""
34+
35+
# Create a client with the endpoint set to the desired region.
36+
workflow_template_client = dataproc.WorkflowTemplateServiceClient(
37+
client_options={
38+
'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region)}
39+
)
40+
41+
parent = workflow_template_client.region_path(project_id, region)
42+
43+
template = {
44+
'jobs': [
45+
{
46+
'hadoop_job': {
47+
'main_jar_file_uri': 'file:///usr/lib/hadoop-mapreduce/'
48+
'hadoop-mapreduce-examples.jar',
49+
'args': [
50+
'teragen',
51+
'1000',
52+
'hdfs:///gen/'
53+
]
54+
},
55+
'step_id': 'teragen'
56+
},
57+
{
58+
'hadoop_job': {
59+
'main_jar_file_uri': 'file:///usr/lib/hadoop-mapreduce/'
60+
'hadoop-mapreduce-examples.jar',
61+
'args': [
62+
'terasort',
63+
'hdfs:///gen/',
64+
'hdfs:///sort/'
65+
]
66+
},
67+
'step_id': 'terasort',
68+
'prerequisite_step_ids': [
69+
'teragen'
70+
]
71+
}],
72+
'placement': {
73+
'managed_cluster': {
74+
'cluster_name': 'my-managed-cluster',
75+
'config': {
76+
'gce_cluster_config': {
77+
# Leave 'zone_uri' empty for 'Auto Zone Placement'
78+
# 'zone_uri': ''
79+
'zone_uri': 'us-central1-a'
80+
}
81+
}
82+
}
83+
}
84+
}
85+
86+
# Submit the request to instantiate the workflow from an inline template.
87+
operation = workflow_template_client.instantiate_inline_workflow_template(
88+
parent, template
89+
)
90+
operation.result()
91+
92+
# Output a success message.
93+
print('Workflow ran successfully.')
94+
# [END dataproc_instantiate_inline_workflow_template]
95+
96+
97+
if __name__ == "__main__":
98+
instantiate_inline_workflow_template(sys.argv[1], sys.argv[2])
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import instantiate_inline_workflow_template
18+
19+
20+
PROJECT_ID = os.environ['GCLOUD_PROJECT']
21+
REGION = 'us-central1'
22+
23+
24+
def test_workflows(capsys):
25+
# Wrapper function for client library function
26+
instantiate_inline_workflow_template.instantiate_inline_workflow_template(
27+
PROJECT_ID, REGION
28+
)
29+
30+
out, _ = capsys.readouterr()
31+
assert "successfully" in out

0 commit comments

Comments
 (0)