|
15 | 15 | # limitations under the License.
|
16 | 16 |
|
17 | 17 | # [START dataproc_quickstart]
|
| 18 | +"""This quickstart sample walks a user through creating a Cloud Dataproc |
| 19 | + cluster, submitting a PySpark job from Google Cloud Storage to the |
| 20 | + cluster, reading the output of the job and deleting the cluster, all |
| 21 | + using the Python client library. |
| 22 | +
|
| 23 | + Usage: |
| 24 | + python3 quickstart.py --project_id <PROJECT_ID> --region <REGION> \ |
| 25 | + --cluster_name <CLUSTER_NAME> --job_file_path <GCS_JOB_FILE_PATH> |
| 26 | +""" |
| 27 | + |
| 28 | +import argparse |
18 | 29 | import time
|
19 | 30 |
|
20 | 31 | from google.cloud import dataproc_v1 as dataproc
|
21 | 32 | from google.cloud import storage
|
22 | 33 |
|
23 | 34 |
|
24 | 35 | def quickstart(project_id, region, cluster_name, job_file_path):
|
25 |
| - """This quickstart sample walks a user through creating a Cloud Dataproc |
26 |
| - cluster, submitting a PySpark job from Google Cloud Storage to the |
27 |
| - cluster, reading the output of the job and deleting the cluster, all |
28 |
| - using the Python client library. |
29 |
| -
|
30 |
| - Args: |
31 |
| - project_id (string): Project to use for creating resources. |
32 |
| - region (string): Region where the resources should live. |
33 |
| - cluster_name (string): Name to use for creating a cluster. |
34 |
| - job_file_path (string): Job in GCS to execute against the cluster. |
35 |
| - """ |
36 |
| - |
37 | 36 | # Create the cluster client.
|
38 | 37 | cluster_client = dataproc.ClusterControllerClient(client_options={
|
39 | 38 | 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region)
|
@@ -125,4 +124,23 @@ def quickstart(project_id, region, cluster_name, job_file_path):
|
125 | 124 | operation.result()
|
126 | 125 |
|
127 | 126 | print('Cluster {} successfully deleted.'.format(cluster_name))
|
128 |
| - # [END dataproc_quickstart] |
| 127 | + |
| 128 | + |
| 129 | +if __name__ == "__main__": |
| 130 | + parser = argparse.ArgumentParser( |
| 131 | + description=__doc__, |
| 132 | + formatter_class=argparse.RawDescriptionHelpFormatter, |
| 133 | + ) |
| 134 | + parser.add_argument('--project_id', type=str, |
| 135 | + help='Project to use for creating resources.') |
| 136 | + parser.add_argument('--region', type=str, |
| 137 | + help='Region where the resources should live.') |
| 138 | + parser.add_argument('--cluster_name', type=str, |
| 139 | + help='Name to use for creating a cluster') |
| 140 | + parser.add_argument('--job_file_path', type=str, |
| 141 | + help='Job in GCS to execute against the cluster.') |
| 142 | + |
| 143 | + args = parser.parse_args() |
| 144 | + quickstart(args.project_id, args.region, |
| 145 | + args.cluster_name, args.job_file_path) |
| 146 | +# [END dataproc_quickstart] |
0 commit comments