Skip to content
This repository was archived by the owner on Feb 23, 2024. It is now read-only.

Commit ba8eaff

Browse files
sirtorrybusunkim96
authored andcommitted
* automl initial commit * lint * fix import groupings * add requirements.txt * address review comments
1 parent 5b8929b commit ba8eaff

8 files changed

+867
-0
lines changed
Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2018 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""This application demonstrates how to perform basic operations on dataset
18+
with the Google AutoML Translation API.
19+
20+
For more information, see the documentation at
21+
https://cloud.google.com/translate/automl/docs
22+
"""
23+
24+
import argparse
25+
import os
26+
27+
28+
def create_dataset(project_id, compute_region, dataset_name, source, target):
29+
"""Create a dataset."""
30+
# [START automl_translation_create_dataset]
31+
# TODO(developer): Uncomment and set the following variables
32+
# project_id = 'PROJECT_ID_HERE'
33+
# compute_region = 'COMPUTE_REGION_HERE'
34+
# dataset_name = 'DATASET_NAME_HERE'
35+
# source = 'LANGUAGE_CODE_OF_SOURCE_LANGUAGE'
36+
# target = 'LANGUAGE_CODE_OF_TARGET_LANGUAGE'
37+
38+
from google.cloud import automl_v1beta1 as automl
39+
40+
client = automl.AutoMlClient()
41+
42+
# A resource that represents Google Cloud Platform location.
43+
project_location = client.location_path(project_id, compute_region)
44+
45+
# Specify the source and target language.
46+
dataset_metadata = {
47+
"source_language_code": source,
48+
"target_language_code": target,
49+
}
50+
# Set dataset name and dataset metadata
51+
my_dataset = {
52+
"display_name": dataset_name,
53+
"translation_dataset_metadata": dataset_metadata,
54+
}
55+
56+
# Create a dataset with the dataset metadata in the region.
57+
dataset = client.create_dataset(project_location, my_dataset)
58+
59+
# Display the dataset information
60+
print("Dataset name: {}".format(dataset.name))
61+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
62+
print("Dataset display name: {}".format(dataset.display_name))
63+
print("Translation dataset Metadata:")
64+
print(
65+
"\tsource_language_code: {}".format(
66+
dataset.translation_dataset_metadata.source_language_code
67+
)
68+
)
69+
print(
70+
"\ttarget_language_code: {}".format(
71+
dataset.translation_dataset_metadata.target_language_code
72+
)
73+
)
74+
print("Dataset create time:")
75+
print("\tseconds: {}".format(dataset.create_time.seconds))
76+
print("\tnanos: {}".format(dataset.create_time.nanos))
77+
78+
# [END automl_translation_create_dataset]
79+
80+
81+
def list_datasets(project_id, compute_region, filter_):
82+
"""List Datasets."""
83+
# [START automl_translation_list_datasets]
84+
# TODO(developer): Uncomment and set the following variables
85+
# project_id = 'PROJECT_ID_HERE'
86+
# compute_region = 'COMPUTE_REGION_HERE'
87+
# filter_ = 'filter expression here'
88+
89+
from google.cloud import automl_v1beta1 as automl
90+
91+
client = automl.AutoMlClient()
92+
93+
# A resource that represents Google Cloud Platform location.
94+
project_location = client.location_path(project_id, compute_region)
95+
96+
# List all the datasets available in the region by applying filter.
97+
response = client.list_datasets(project_location, filter_)
98+
99+
print("List of datasets:")
100+
for dataset in response:
101+
# Display the dataset information
102+
print("Dataset name: {}".format(dataset.name))
103+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
104+
print("Dataset display name: {}".format(dataset.display_name))
105+
print("Translation dataset metadata:")
106+
print(
107+
"\tsource_language_code: {}".format(
108+
dataset.translation_dataset_metadata.source_language_code
109+
)
110+
)
111+
print(
112+
"\ttarget_language_code: {}".format(
113+
dataset.translation_dataset_metadata.target_language_code
114+
)
115+
)
116+
print("Dataset create time:")
117+
print("\tseconds: {}".format(dataset.create_time.seconds))
118+
print("\tnanos: {}".format(dataset.create_time.nanos))
119+
120+
# [END automl_translation_list_datasets]
121+
122+
123+
def get_dataset(project_id, compute_region, dataset_id):
124+
"""Get the dataset."""
125+
# [START automl_translation_get_dataset]
126+
# TODO(developer): Uncomment and set the following variables
127+
# project_id = 'PROJECT_ID_HERE'
128+
# compute_region = 'COMPUTE_REGION_HERE'
129+
# dataset_id = 'DATASET_ID_HERE'
130+
131+
from google.cloud import automl_v1beta1 as automl
132+
133+
client = automl.AutoMlClient()
134+
135+
# Get the full path of the dataset
136+
dataset_full_id = client.dataset_path(
137+
project_id, compute_region, dataset_id
138+
)
139+
140+
# Get complete detail of the dataset.
141+
dataset = client.get_dataset(dataset_full_id)
142+
143+
# Display the dataset information
144+
print("Dataset name: {}".format(dataset.name))
145+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
146+
print("Dataset display name: {}".format(dataset.display_name))
147+
print("Translation dataset metadata:")
148+
print(
149+
"\tsource_language_code: {}".format(
150+
dataset.translation_dataset_metadata.source_language_code
151+
)
152+
)
153+
print(
154+
"\ttarget_language_code: {}".format(
155+
dataset.translation_dataset_metadata.target_language_code
156+
)
157+
)
158+
print("Dataset create time:")
159+
print("\tseconds: {}".format(dataset.create_time.seconds))
160+
print("\tnanos: {}".format(dataset.create_time.nanos))
161+
162+
# [END automl_translation_get_dataset]
163+
164+
165+
def import_data(project_id, compute_region, dataset_id, path):
166+
"""Import sentence pairs to the dataset."""
167+
# [START automl_translation_import_data]
168+
# TODO(developer): Uncomment and set the following variables
169+
# project_id = 'PROJECT_ID_HERE'
170+
# compute_region = 'COMPUTE_REGION_HERE'
171+
# dataset_id = 'DATASET_ID_HERE'
172+
# path = 'gs://path/to/file.csv'
173+
174+
from google.cloud import automl_v1beta1 as automl
175+
176+
client = automl.AutoMlClient()
177+
178+
# Get the full path of the dataset.
179+
dataset_full_id = client.dataset_path(
180+
project_id, compute_region, dataset_id
181+
)
182+
183+
# Get the multiple Google Cloud Storage URIs
184+
input_uris = path.split(",")
185+
input_config = {"gcs_source": {"input_uris": input_uris}}
186+
187+
# Import data from the input URI
188+
response = client.import_data(dataset_full_id, input_config)
189+
190+
print("Processing import...")
191+
# synchronous check of operation status
192+
print("Data imported. {}".format(response.result()))
193+
194+
# [END automl_translation_import_data]
195+
196+
197+
def delete_dataset(project_id, compute_region, dataset_id):
198+
"""Delete a dataset."""
199+
# [START automl_translation_delete_dataset]]
200+
# TODO(developer): Uncomment and set the following variables
201+
# project_id = 'PROJECT_ID_HERE'
202+
# compute_region = 'COMPUTE_REGION_HERE'
203+
# dataset_id = 'DATASET_ID_HERE'
204+
205+
from google.cloud import automl_v1beta1 as automl
206+
207+
client = automl.AutoMlClient()
208+
209+
# Get the full path of the dataset.
210+
dataset_full_id = client.dataset_path(
211+
project_id, compute_region, dataset_id
212+
)
213+
214+
# Delete a dataset.
215+
response = client.delete_dataset(dataset_full_id)
216+
217+
# synchronous check of operation status
218+
print("Dataset deleted. {}".format(response.result()))
219+
220+
# [END automl_translation_delete_dataset]
221+
222+
223+
if __name__ == "__main__":
224+
parser = argparse.ArgumentParser(
225+
description=__doc__,
226+
formatter_class=argparse.RawDescriptionHelpFormatter,
227+
)
228+
subparsers = parser.add_subparsers(dest="command")
229+
230+
create_dataset_parser = subparsers.add_parser(
231+
"create_dataset", help=create_dataset.__doc__
232+
)
233+
create_dataset_parser.add_argument("dataset_name")
234+
create_dataset_parser.add_argument("source")
235+
create_dataset_parser.add_argument("target")
236+
237+
list_datasets_parser = subparsers.add_parser(
238+
"list_datasets", help=list_datasets.__doc__
239+
)
240+
list_datasets_parser.add_argument("filter", nargs="?", default="")
241+
242+
import_data_parser = subparsers.add_parser(
243+
"import_data", help=import_data.__doc__
244+
)
245+
import_data_parser.add_argument("dataset_id")
246+
import_data_parser.add_argument("path")
247+
248+
delete_dataset_parser = subparsers.add_parser(
249+
"delete_dataset", help=delete_dataset.__doc__
250+
)
251+
delete_dataset_parser.add_argument("dataset_id")
252+
253+
get_dataset_parser = subparsers.add_parser(
254+
"get_dataset", help=get_dataset.__doc__
255+
)
256+
get_dataset_parser.add_argument("dataset_id")
257+
258+
project_id = os.environ["PROJECT_ID"]
259+
compute_region = os.environ["REGION_NAME"]
260+
261+
args = parser.parse_args()
262+
263+
if args.command == "create_dataset":
264+
create_dataset(
265+
project_id,
266+
compute_region,
267+
args.dataset_name,
268+
args.source,
269+
args.target,
270+
)
271+
if args.command == "list_datasets":
272+
list_datasets(project_id, compute_region, args.filter)
273+
if args.command == "get_dataset":
274+
get_dataset(project_id, compute_region, args.dataset_id)
275+
if args.command == "import_data":
276+
import_data(project_id, compute_region, args.dataset_id, args.path)
277+
if args.command == "delete_dataset":
278+
delete_dataset(project_id, compute_region, args.dataset_id)

0 commit comments

Comments
 (0)