Skip to content

Commit a918a34

Browse files
authored
automl beta (#1575)
* automl initial commit * lint * fix import groupings * add requirements.txt * address review comments
1 parent c310941 commit a918a34

24 files changed

+2836
-0
lines changed
Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2018 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""This application demonstrates how to perform basic operations on Dataset
18+
with the Google AutoML Natural Language API.
19+
20+
For more information, see the tutorial page at
21+
https://cloud.google.com/natural-language/automl/docs/
22+
"""
23+
24+
import argparse
25+
import os
26+
27+
28+
def create_dataset(project_id, compute_region, dataset_name, multilabel=False):
29+
"""Create a dataset."""
30+
# [START automl_natural_language_create_dataset]
31+
# TODO(developer): Uncomment and set the following variables
32+
# project_id = 'PROJECT_ID_HERE'
33+
# compute_region = 'COMPUTE_REGION_HERE'
34+
# dataset_name = 'DATASET_NAME_HERE'
35+
# multilabel = True for multilabel or False for multiclass
36+
37+
from google.cloud import automl_v1beta1 as automl
38+
39+
client = automl.AutoMlClient()
40+
41+
# A resource that represents Google Cloud Platform location.
42+
project_location = client.location_path(project_id, compute_region)
43+
44+
# Classification type is assigned based on multilabel value.
45+
classification_type = "MULTICLASS"
46+
if multilabel:
47+
classification_type = "MULTILABEL"
48+
49+
# Specify the text classification type for the dataset.
50+
dataset_metadata = {"classification_type": classification_type}
51+
52+
# Set dataset name and metadata.
53+
my_dataset = {
54+
"display_name": dataset_name,
55+
"text_classification_dataset_metadata": dataset_metadata,
56+
}
57+
58+
# Create a dataset with the dataset metadata in the region.
59+
dataset = client.create_dataset(project_location, my_dataset)
60+
61+
# Display the dataset information.
62+
print("Dataset name: {}".format(dataset.name))
63+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
64+
print("Dataset display name: {}".format(dataset.display_name))
65+
print("Text classification dataset metadata:")
66+
print("\t{}".format(dataset.text_classification_dataset_metadata))
67+
print("Dataset example count: {}".format(dataset.example_count))
68+
print("Dataset create time:")
69+
print("\tseconds: {}".format(dataset.create_time.seconds))
70+
print("\tnanos: {}".format(dataset.create_time.nanos))
71+
72+
# [END automl_natural_language_create_dataset]
73+
74+
75+
def list_datasets(project_id, compute_region, filter_):
76+
"""List all datasets."""
77+
# [START automl_natural_language_list_datasets]
78+
# TODO(developer): Uncomment and set the following variables
79+
# project_id = 'PROJECT_ID_HERE'
80+
# compute_region = 'COMPUTE_REGION_HERE'
81+
# filter_ = 'filter expression here'
82+
83+
from google.cloud import automl_v1beta1 as automl
84+
85+
client = automl.AutoMlClient()
86+
87+
# A resource that represents Google Cloud Platform location.
88+
project_location = client.location_path(project_id, compute_region)
89+
90+
# List all the datasets available in the region by applying filter.
91+
response = client.list_datasets(project_location, filter_)
92+
93+
print("List of datasets:")
94+
for dataset in response:
95+
# Display the dataset information.
96+
print("Dataset name: {}".format(dataset.name))
97+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
98+
print("Dataset display name: {}".format(dataset.display_name))
99+
print("Text classification dataset metadata:")
100+
print("\t{}".format(dataset.text_classification_dataset_metadata))
101+
print("Dataset example count: {}".format(dataset.example_count))
102+
print("Dataset create time:")
103+
print("\tseconds: {}".format(dataset.create_time.seconds))
104+
print("\tnanos: {}".format(dataset.create_time.nanos))
105+
106+
# [END automl_natural_language_list_datasets]
107+
108+
109+
def get_dataset(project_id, compute_region, dataset_id):
110+
"""Get the dataset."""
111+
# [START automl_natural_language_get_dataset]
112+
# TODO(developer): Uncomment and set the following variables
113+
# project_id = 'PROJECT_ID_HERE'
114+
# compute_region = 'COMPUTE_REGION_HERE'
115+
# dataset_id = 'DATASET_ID_HERE'
116+
117+
from google.cloud import automl_v1beta1 as automl
118+
119+
client = automl.AutoMlClient()
120+
121+
# Get the full path of the dataset
122+
dataset_full_id = client.dataset_path(
123+
project_id, compute_region, dataset_id
124+
)
125+
126+
# Get complete detail of the dataset.
127+
dataset = client.get_dataset(dataset_full_id)
128+
129+
# Display the dataset information.
130+
print("Dataset name: {}".format(dataset.name))
131+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
132+
print("Dataset display name: {}".format(dataset.display_name))
133+
print("Text classification dataset metadata:")
134+
print("\t{}".format(dataset.text_classification_dataset_metadata))
135+
print("Dataset example count: {}".format(dataset.example_count))
136+
print("Dataset create time:")
137+
print("\tseconds: {}".format(dataset.create_time.seconds))
138+
print("\tnanos: {}".format(dataset.create_time.nanos))
139+
140+
# [END automl_natural_language_get_dataset]
141+
142+
143+
def import_data(project_id, compute_region, dataset_id, path):
144+
"""Import labelled items."""
145+
# [START automl_natural_language_import_data]
146+
# TODO(developer): Uncomment and set the following variables
147+
# project_id = 'PROJECT_ID_HERE'
148+
# compute_region = 'COMPUTE_REGION_HERE'
149+
# dataset_id = 'DATASET_ID_HERE'
150+
# path = 'gs://path/to/file.csv'
151+
152+
from google.cloud import automl_v1beta1 as automl
153+
154+
client = automl.AutoMlClient()
155+
156+
# Get the full path of the dataset.
157+
dataset_full_id = client.dataset_path(
158+
project_id, compute_region, dataset_id
159+
)
160+
161+
# Get the multiple Google Cloud Storage URIs.
162+
input_uris = path.split(",")
163+
input_config = {"gcs_source": {"input_uris": input_uris}}
164+
165+
# Import the dataset from the input URI.
166+
response = client.import_data(dataset_full_id, input_config)
167+
168+
print("Processing import...")
169+
# synchronous check of operation status.
170+
print("Data imported. {}".format(response.result()))
171+
172+
# [END automl_natural_language_import_data]
173+
174+
175+
def export_data(project_id, compute_region, dataset_id, output_uri):
176+
"""Export a dataset to a Google Cloud Storage bucket."""
177+
# [START automl_natural_language_export_data]
178+
# TODO(developer): Uncomment and set the following variables
179+
# project_id = 'PROJECT_ID_HERE'
180+
# compute_region = 'COMPUTE_REGION_HERE'
181+
# dataset_id = 'DATASET_ID_HERE'
182+
# output_uri: 'gs://location/to/export/data'
183+
184+
from google.cloud import automl_v1beta1 as automl
185+
186+
client = automl.AutoMlClient()
187+
188+
# Get the full path of the dataset.
189+
dataset_full_id = client.dataset_path(
190+
project_id, compute_region, dataset_id
191+
)
192+
193+
# Set the output URI
194+
output_config = {"gcs_destination": {"output_uri_prefix": output_uri}}
195+
196+
# Export the data to the output URI.
197+
response = client.export_data(dataset_full_id, output_config)
198+
199+
print("Processing export...")
200+
# synchronous check of operation status.
201+
print("Data exported. {}".format(response.result()))
202+
203+
# [END automl_natural_language_export_data]
204+
205+
206+
def delete_dataset(project_id, compute_region, dataset_id):
207+
"""Delete a dataset."""
208+
# [START automl_natural_language_delete_dataset]
209+
# TODO(developer): Uncomment and set the following variables
210+
# project_id = 'PROJECT_ID_HERE'
211+
# compute_region = 'COMPUTE_REGION_HERE'
212+
# dataset_id = 'DATASET_ID_HERE'
213+
214+
from google.cloud import automl_v1beta1 as automl
215+
216+
client = automl.AutoMlClient()
217+
218+
# Get the full path of the dataset.
219+
dataset_full_id = client.dataset_path(
220+
project_id, compute_region, dataset_id
221+
)
222+
223+
# Delete a dataset.
224+
response = client.delete_dataset(dataset_full_id)
225+
226+
# synchronous check of operation status.
227+
print("Dataset deleted. {}".format(response.result()))
228+
229+
# [END automl_natural_language_delete_dataset]
230+
231+
232+
if __name__ == "__main__":
233+
parser = argparse.ArgumentParser(
234+
description=__doc__,
235+
formatter_class=argparse.RawDescriptionHelpFormatter,
236+
)
237+
subparsers = parser.add_subparsers(dest="command")
238+
239+
create_dataset_parser = subparsers.add_parser(
240+
"create_dataset", help=create_dataset.__doc__
241+
)
242+
create_dataset_parser.add_argument("dataset_name")
243+
create_dataset_parser.add_argument(
244+
"multilabel", nargs="?", choices=["False", "True"], default="False"
245+
)
246+
247+
list_datasets_parser = subparsers.add_parser(
248+
"list_datasets", help=list_datasets.__doc__
249+
)
250+
list_datasets_parser.add_argument(
251+
"filter_", nargs="?", default="text_classification_dataset_metadata:*"
252+
)
253+
254+
get_dataset_parser = subparsers.add_parser(
255+
"get_dataset", help=get_dataset.__doc__
256+
)
257+
get_dataset_parser.add_argument("dataset_id")
258+
259+
import_data_parser = subparsers.add_parser(
260+
"import_data", help=import_data.__doc__
261+
)
262+
import_data_parser.add_argument("dataset_id")
263+
import_data_parser.add_argument("path")
264+
265+
export_data_parser = subparsers.add_parser(
266+
"export_data", help=export_data.__doc__
267+
)
268+
export_data_parser.add_argument("dataset_id")
269+
export_data_parser.add_argument("output_uri")
270+
271+
delete_dataset_parser = subparsers.add_parser(
272+
"delete_dataset", help=delete_dataset.__doc__
273+
)
274+
delete_dataset_parser.add_argument("dataset_id")
275+
276+
project_id = os.environ["PROJECT_ID"]
277+
compute_region = os.environ["REGION_NAME"]
278+
279+
args = parser.parse_args()
280+
281+
if args.command == "create_dataset":
282+
multilabel = True if args.multilabel == "True" else False
283+
create_dataset(
284+
project_id, compute_region, args.dataset_name, multilabel
285+
)
286+
if args.command == "list_datasets":
287+
list_datasets(project_id, compute_region, args.filter_)
288+
if args.command == "get_dataset":
289+
get_dataset(project_id, compute_region, args.dataset_id)
290+
if args.command == "import_data":
291+
import_data(project_id, compute_region, args.dataset_id, args.path)
292+
if args.command == "export_data":
293+
export_data(
294+
project_id, compute_region, args.dataset_id, args.output_uri
295+
)
296+
if args.command == "delete_dataset":
297+
delete_dataset(project_id, compute_region, args.dataset_id)

0 commit comments

Comments
 (0)