Skip to content

Commit 14cf125

Browse files
loferrisgcf-owl-bot[bot]busunkim96
authored
docs: update cluster sample (#218)
* first steps in adding sample * consistent formatting with create_cluster.py * test first draft * update_cluster sample complete * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * docs: add update cluster sample - fixing formatting * Update samples/snippets/update_cluster.py Co-authored-by: Bu Sun Kim <[email protected]> * Update samples/snippets/update_cluster.py Co-authored-by: Bu Sun Kim <[email protected]> * updated test, still fine-tuning * added get_cluster to test * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * another attempt at writing test * new test pattern * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * updated static for new_num_instances and fixed linting error Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Bu Sun Kim <[email protected]>
1 parent c8ba31b commit 14cf125

8 files changed

+212
-48
lines changed

dataproc/snippets/create_cluster.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@
2929

3030
def create_cluster(project_id, region, cluster_name):
3131
"""This sample walks a user through creating a Cloud Dataproc cluster
32-
using the Python client library.
32+
using the Python client library.
3333
34-
Args:
35-
project_id (string): Project to use for creating resources.
36-
region (string): Region where the resources should live.
37-
cluster_name (string): Name to use for creating a cluster.
34+
Args:
35+
project_id (string): Project to use for creating resources.
36+
region (string): Region where the resources should live.
37+
cluster_name (string): Name to use for creating a cluster.
3838
"""
3939

4040
# Create a client with the endpoint set to the desired cluster region.

dataproc/snippets/instantiate_inline_workflow_template.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@
2727

2828
def instantiate_inline_workflow_template(project_id, region):
2929
"""This sample walks a user through submitting a workflow
30-
for a Cloud Dataproc using the Python client library.
30+
for a Cloud Dataproc using the Python client library.
3131
32-
Args:
33-
project_id (string): Project to use for running the workflow.
34-
region (string): Region where the workflow resources should live.
32+
Args:
33+
project_id (string): Project to use for running the workflow.
34+
region (string): Region where the workflow resources should live.
3535
"""
3636

3737
# Create a client with the endpoint set to the desired region.

dataproc/snippets/list_clusters.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,10 @@ def main(project_id, region):
4949
else:
5050
# Use a regional gRPC endpoint. See:
5151
# https://cloud.google.com/dataproc/docs/concepts/regional-endpoints
52-
client_transport = cluster_controller_grpc_transport.ClusterControllerGrpcTransport(
53-
address="{}-dataproc.googleapis.com:443".format(region)
52+
client_transport = (
53+
cluster_controller_grpc_transport.ClusterControllerGrpcTransport(
54+
address="{}-dataproc.googleapis.com:443".format(region)
55+
)
5456
)
5557
dataproc_cluster_client = dataproc_v1.ClusterControllerClient(client_transport)
5658

dataproc/snippets/submit_job.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323

2424
# [START dataproc_submit_job]
2525
import re
26+
2627
# [END dataproc_submit_job]
2728
import sys
29+
2830
# [START dataproc_submit_job]
2931

3032
from google.cloud import dataproc_v1 as dataproc
@@ -33,21 +35,19 @@
3335

3436
def submit_job(project_id, region, cluster_name):
3537
# Create the job client.
36-
job_client = dataproc.JobControllerClient(client_options={
37-
'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region)
38-
})
38+
job_client = dataproc.JobControllerClient(
39+
client_options={"api_endpoint": "{}-dataproc.googleapis.com:443".format(region)}
40+
)
3941

4042
# Create the job config. 'main_jar_file_uri' can also be a
4143
# Google Cloud Storage URL.
4244
job = {
43-
'placement': {
44-
'cluster_name': cluster_name
45+
"placement": {"cluster_name": cluster_name},
46+
"spark_job": {
47+
"main_class": "org.apache.spark.examples.SparkPi",
48+
"jar_file_uris": ["file:///usr/lib/spark/examples/jars/spark-examples.jar"],
49+
"args": ["1000"],
4550
},
46-
'spark_job': {
47-
'main_class': 'org.apache.spark.examples.SparkPi',
48-
'jar_file_uris': ['file:///usr/lib/spark/examples/jars/spark-examples.jar'],
49-
'args': ['1000']
50-
}
5151
}
5252

5353
operation = job_client.submit_job_as_operation(
@@ -67,12 +67,14 @@ def submit_job(project_id, region, cluster_name):
6767
)
6868

6969
print(f"Job finished successfully: {output}")
70+
71+
7072
# [END dataproc_submit_job]
7173

7274

7375
if __name__ == "__main__":
7476
if len(sys.argv) < 3:
75-
sys.exit('python submit_job.py project_id region cluster_name')
77+
sys.exit("python submit_job.py project_id region cluster_name")
7678

7779
project_id = sys.argv[1]
7880
region = sys.argv[2]

dataproc/snippets/submit_job_test.py

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,30 +21,24 @@
2121
import submit_job
2222

2323

24-
PROJECT_ID = os.environ['GOOGLE_CLOUD_PROJECT']
25-
REGION = 'us-central1'
26-
CLUSTER_NAME = 'py-sj-test-{}'.format(str(uuid.uuid4()))
24+
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
25+
REGION = "us-central1"
26+
CLUSTER_NAME = "py-sj-test-{}".format(str(uuid.uuid4()))
2727
CLUSTER = {
28-
'project_id': PROJECT_ID,
29-
'cluster_name': CLUSTER_NAME,
30-
'config': {
31-
'master_config': {
32-
'num_instances': 1,
33-
'machine_type_uri': 'n1-standard-2'
34-
},
35-
'worker_config': {
36-
'num_instances': 2,
37-
'machine_type_uri': 'n1-standard-2'
38-
}
39-
}
28+
"project_id": PROJECT_ID,
29+
"cluster_name": CLUSTER_NAME,
30+
"config": {
31+
"master_config": {"num_instances": 1, "machine_type_uri": "n1-standard-2"},
32+
"worker_config": {"num_instances": 2, "machine_type_uri": "n1-standard-2"},
33+
},
4034
}
4135

4236

4337
@pytest.fixture(autouse=True)
4438
def setup_teardown():
45-
cluster_client = dataproc.ClusterControllerClient(client_options={
46-
'api_endpoint': '{}-dataproc.googleapis.com:443'.format(REGION)
47-
})
39+
cluster_client = dataproc.ClusterControllerClient(
40+
client_options={"api_endpoint": "{}-dataproc.googleapis.com:443".format(REGION)}
41+
)
4842

4943
# Create the cluster.
5044
operation = cluster_client.create_cluster(
@@ -54,13 +48,17 @@ def setup_teardown():
5448

5549
yield
5650

57-
cluster_client.delete_cluster(request={
58-
"project_id": PROJECT_ID, "region": REGION, "cluster_name": CLUSTER_NAME
59-
})
51+
cluster_client.delete_cluster(
52+
request={
53+
"project_id": PROJECT_ID,
54+
"region": REGION,
55+
"cluster_name": CLUSTER_NAME,
56+
}
57+
)
6058

6159

6260
def test_submit_job(capsys):
6361
submit_job.submit_job(PROJECT_ID, REGION, CLUSTER_NAME)
6462
out, _ = capsys.readouterr()
6563

66-
assert 'Job finished successfully' in out
64+
assert "Job finished successfully" in out

dataproc/snippets/submit_job_to_cluster.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,10 @@ def download_output(project, cluster_id, output_bucket, job_id):
7777
print("Downloading output file.")
7878
client = storage.Client(project=project)
7979
bucket = client.get_bucket(output_bucket)
80-
output_blob = "google-cloud-dataproc-metainfo/{}/jobs/{}/driveroutput.000000000".format(
81-
cluster_id, job_id
80+
output_blob = (
81+
"google-cloud-dataproc-metainfo/{}/jobs/{}/driveroutput.000000000".format(
82+
cluster_id, job_id
83+
)
8284
)
8385
return bucket.blob(output_blob).download_as_string()
8486

@@ -230,8 +232,10 @@ def main(
230232
region = get_region_from_zone(zone)
231233
# Use a regional gRPC endpoint. See:
232234
# https://cloud.google.com/dataproc/docs/concepts/regional-endpoints
233-
client_transport = cluster_controller_grpc_transport.ClusterControllerGrpcTransport(
234-
address="{}-dataproc.googleapis.com:443".format(region)
235+
client_transport = (
236+
cluster_controller_grpc_transport.ClusterControllerGrpcTransport(
237+
address="{}-dataproc.googleapis.com:443".format(region)
238+
)
235239
)
236240
job_transport = job_controller_grpc_transport.JobControllerGrpcTransport(
237241
address="{}-dataproc.googleapis.com:443".format(region)

dataproc/snippets/update_cluster.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This sample walks a user through updating the number of clusters using the Dataproc
16+
# client library.
17+
18+
# Usage:
19+
# python update_cluster.py --project_id <PROJECT_ID> --region <REGION> --cluster_name <CLUSTER_NAME>
20+
21+
import sys
22+
23+
# [START dataproc_update_cluster]
24+
from google.cloud import dataproc_v1 as dataproc
25+
26+
27+
def update_cluster(project_id, region, cluster_name, new_num_instances):
28+
"""This sample walks a user through updating a Cloud Dataproc cluster
29+
using the Python client library.
30+
31+
Args:
32+
project_id (str): Project to use for creating resources.
33+
region (str): Region where the resources should live.
34+
cluster_name (str): Name to use for creating a cluster.
35+
"""
36+
37+
# Create a client with the endpoint set to the desired cluster region.
38+
client = dataproc.ClusterControllerClient(
39+
client_options={"api_endpoint": f"{region}-dataproc.googleapis.com:443"}
40+
)
41+
42+
# Get cluster you wish to update.
43+
cluster = client.get_cluster(
44+
project_id=project_id, region=region, cluster_name=cluster_name
45+
)
46+
47+
# Update number of clusters
48+
mask = {"paths": {"config.worker_config.num_instances": str(new_num_instances)}}
49+
50+
# Update cluster config
51+
cluster.config.worker_config.num_instances = new_num_instances
52+
53+
# Update cluster
54+
operation = client.update_cluster(
55+
project_id=project_id,
56+
region=region,
57+
cluster=cluster,
58+
cluster_name=cluster_name,
59+
update_mask=mask,
60+
)
61+
62+
# Output a success message.
63+
updated_cluster = operation.result()
64+
print(f"Cluster was updated successfully: {updated_cluster.cluster_name}")
65+
66+
67+
# [END dataproc_update_cluster]
68+
69+
70+
if __name__ == "__main__":
71+
if len(sys.argv) < 5:
72+
sys.exit("python update_cluster.py project_id region cluster_name")
73+
74+
project_id = sys.argv[1]
75+
region = sys.argv[2]
76+
cluster_name = sys.argv[3]
77+
new_num_instances = sys.argv[4]
78+
update_cluster(project_id, region, cluster_name)
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This sample walks a user through updating the number of clusters using the Dataproc
16+
# client library.
17+
18+
19+
import os
20+
import uuid
21+
22+
from google.cloud.dataproc_v1.services.cluster_controller.client import (
23+
ClusterControllerClient,
24+
)
25+
import pytest
26+
27+
import update_cluster
28+
29+
30+
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
31+
REGION = "us-central1"
32+
CLUSTER_NAME = f"py-cc-test-{str(uuid.uuid4())}"
33+
NEW_NUM_INSTANCES = 5
34+
CLUSTER = {
35+
"project_id": PROJECT_ID,
36+
"cluster_name": CLUSTER_NAME,
37+
"config": {
38+
"master_config": {"num_instances": 1, "machine_type_uri": "n1-standard-2"},
39+
"worker_config": {"num_instances": 2, "machine_type_uri": "n1-standard-2"},
40+
},
41+
}
42+
43+
44+
@pytest.fixture(autouse=True)
45+
def setup_teardown(cluster_client):
46+
# Create the cluster.
47+
operation = cluster_client.create_cluster(
48+
request={"project_id": PROJECT_ID, "region": REGION, "cluster": CLUSTER}
49+
)
50+
operation.result()
51+
52+
yield
53+
54+
cluster_client.delete_cluster(
55+
request={
56+
"project_id": PROJECT_ID,
57+
"region": REGION,
58+
"cluster_name": CLUSTER_NAME,
59+
}
60+
)
61+
62+
63+
@pytest.fixture
64+
def cluster_client():
65+
cluster_client = ClusterControllerClient(
66+
client_options={"api_endpoint": "{}-dataproc.googleapis.com:443".format(REGION)}
67+
)
68+
return cluster_client
69+
70+
71+
def test_update_cluster(capsys, cluster_client: ClusterControllerClient):
72+
# Wrapper function for client library function
73+
update_cluster.update_cluster(PROJECT_ID, REGION, CLUSTER_NAME, NEW_NUM_INSTANCES)
74+
new_num_cluster = cluster_client.get_cluster(
75+
project_id=PROJECT_ID, region=REGION, cluster_name=CLUSTER_NAME
76+
)
77+
78+
out, _ = capsys.readouterr()
79+
assert CLUSTER_NAME in out
80+
assert new_num_cluster.config.worker_config.num_instances == NEW_NUM_INSTANCES

0 commit comments

Comments
 (0)