Skip to content

Commit faaa7d1

Browse files
nnegreybusunkim96
authored andcommitted
automl: add base dataset samples for automl ga [(#2608)](#2608)
* automl: add base dataset samples for automl ga * Use a unique prefix * Move test imports to top / misc feedback cleanup * Update tests * Use centralized testing project for automl resources * Test fix * Consistently use double quotes * License year 2020 * Use a fake dataset to fix the flaky test as only one operation can work at a time * use centralized automl testing project * use different automl product to import data
1 parent a2bc66e commit faaa7d1

10 files changed

+516
-0
lines changed

automl/snippets/delete_dataset.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def delete_dataset(project_id, dataset_id):
17+
"""Delete a dataset."""
18+
# [START automl_delete_dataset]
19+
from google.cloud import automl
20+
21+
# TODO(developer): Uncomment and set the following variables
22+
# project_id = "YOUR_PROJECT_ID"
23+
# dataset_id = "YOUR_DATASET_ID"
24+
25+
client = automl.AutoMlClient()
26+
# Get the full path of the dataset
27+
dataset_full_id = client.dataset_path(
28+
project_id, "us-central1", dataset_id
29+
)
30+
response = client.delete_dataset(dataset_full_id)
31+
32+
print("Dataset deleted. {}".format(response.result()))
33+
# [END automl_delete_dataset]
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime
16+
import os
17+
18+
from google.cloud import automl
19+
import pytest
20+
21+
import delete_dataset
22+
23+
PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
24+
BUCKET_ID = "{}-lcm".format(PROJECT_ID)
25+
26+
27+
@pytest.fixture(scope="function")
28+
def create_dataset():
29+
client = automl.AutoMlClient()
30+
project_location = client.location_path(PROJECT_ID, "us-central1")
31+
display_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
32+
metadata = automl.types.TextExtractionDatasetMetadata()
33+
dataset = automl.types.Dataset(
34+
display_name=display_name, text_extraction_dataset_metadata=metadata
35+
)
36+
response = client.create_dataset(project_location, dataset)
37+
dataset_id = response.result().name.split("/")[-1]
38+
39+
yield dataset_id
40+
41+
42+
def test_delete_dataset(capsys, create_dataset):
43+
# delete dataset
44+
delete_dataset.delete_dataset(PROJECT_ID, create_dataset)
45+
out, _ = capsys.readouterr()
46+
assert "Dataset deleted." in out

automl/snippets/export_dataset.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def export_dataset(project_id, dataset_id, gcs_uri):
17+
"""Export a dataset."""
18+
# [START automl_export_dataset]
19+
from google.cloud import automl
20+
21+
# TODO(developer): Uncomment and set the following variables
22+
# project_id = "YOUR_PROJECT_ID"
23+
# dataset_id = "YOUR_DATASET_ID"
24+
# gcs_uri = "gs://YOUR_BUCKET_ID/path/to/export/"
25+
26+
client = automl.AutoMlClient()
27+
28+
# Get the full path of the dataset
29+
dataset_full_id = client.dataset_path(
30+
project_id, "us-central1", dataset_id
31+
)
32+
33+
gcs_destination = automl.types.GcsDestination(output_uri_prefix=gcs_uri)
34+
output_config = automl.types.OutputConfig(gcs_destination=gcs_destination)
35+
36+
response = client.export_data(dataset_full_id, output_config)
37+
print("Dataset exported. {}".format(response.result()))
38+
# [END automl_export_dataset]
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime
16+
import os
17+
18+
import export_dataset
19+
20+
PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
21+
BUCKET_ID = "{}-lcm".format(PROJECT_ID)
22+
PREFIX = "TEST_EXPORT_OUTPUT_" + datetime.datetime.now().strftime(
23+
"%Y%m%d%H%M%S"
24+
)
25+
DATASET_ID = "TEN0000000000000000000"
26+
27+
28+
def test_export_dataset(capsys):
29+
# As exporting a dataset can take a long time and only one operation can be
30+
# run on a dataset at once. Try to export a nonexistent dataset and confirm
31+
# that the dataset was not found, but other elements of the request were\
32+
# valid.
33+
try:
34+
export_dataset.export_dataset(
35+
PROJECT_ID, DATASET_ID, "gs://{}/{}/".format(BUCKET_ID, PREFIX)
36+
)
37+
out, _ = capsys.readouterr()
38+
assert (
39+
"The Dataset doesn't exist or is inaccessible for use with AutoMl."
40+
in out
41+
)
42+
except Exception as e:
43+
assert (
44+
"The Dataset doesn't exist or is inaccessible for use with AutoMl."
45+
in e.message
46+
)

automl/snippets/get_dataset.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def get_dataset(project_id, dataset_id):
17+
"""Get a dataset."""
18+
# [START automl_language_entity_extraction_get_dataset]
19+
# [START automl_language_sentiment_analysis_get_dataset]
20+
# [START automl_language_text_classification_get_dataset]
21+
# [START automl_translate_get_dataset]
22+
# [START automl_vision_classification_get_dataset]
23+
# [START automl_vision_object_detection_get_dataset]
24+
from google.cloud import automl
25+
26+
# TODO(developer): Uncomment and set the following variables
27+
# project_id = "YOUR_PROJECT_ID"
28+
# dataset_id = "YOUR_DATASET_ID"
29+
30+
client = automl.AutoMlClient()
31+
# Get the full path of the dataset
32+
dataset_full_id = client.dataset_path(
33+
project_id, "us-central1", dataset_id
34+
)
35+
dataset = client.get_dataset(dataset_full_id)
36+
37+
# Display the dataset information
38+
print("Dataset name: {}".format(dataset.name))
39+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
40+
print("Dataset display name: {}".format(dataset.display_name))
41+
print("Dataset create time:")
42+
print("\tseconds: {}".format(dataset.create_time.seconds))
43+
print("\tnanos: {}".format(dataset.create_time.nanos))
44+
# [END automl_language_sentiment_analysis_get_dataset]
45+
# [END automl_language_text_classification_get_dataset]
46+
# [END automl_translate_get_dataset]
47+
# [END automl_vision_classification_get_dataset]
48+
# [END automl_vision_object_detection_get_dataset]
49+
print(
50+
"Text extraction dataset metadata: {}".format(
51+
dataset.text_extraction_dataset_metadata
52+
)
53+
)
54+
# [END automl_language_entity_extraction_get_dataset]
55+
56+
# [START automl_language_sentiment_analysis_get_dataset]
57+
print(
58+
"Text sentiment dataset metadata: {}".format(
59+
dataset.text_sentiment_dataset_metadata
60+
)
61+
)
62+
# [END automl_language_sentiment_analysis_get_dataset]
63+
64+
# [START automl_language_text_classification_get_dataset]
65+
print(
66+
"Text classification dataset metadata: {}".format(
67+
dataset.text_classification_dataset_metadata
68+
)
69+
)
70+
# [END automl_language_text_classification_get_dataset]
71+
72+
# [START automl_translate_get_dataset]
73+
print("Translation dataset metadata:")
74+
print(
75+
"\tsource_language_code: {}".format(
76+
dataset.translation_dataset_metadata.source_language_code
77+
)
78+
)
79+
print(
80+
"\ttarget_language_code: {}".format(
81+
dataset.translation_dataset_metadata.target_language_code
82+
)
83+
)
84+
# [END automl_translate_get_dataset]
85+
86+
# [START automl_vision_classification_get_dataset]
87+
print(
88+
"Image classification dataset metadata: {}".format(
89+
dataset.image_classification_dataset_metadata
90+
)
91+
)
92+
# [END automl_vision_classification_get_dataset]
93+
94+
# [START automl_vision_object_detection_get_dataset]
95+
print(
96+
"Image object detection dataset metadata: {}".format(
97+
dataset.image_object_detection_dataset_metadata
98+
)
99+
)
100+
# [END automl_vision_object_detection_get_dataset]

automl/snippets/get_dataset_test.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import get_dataset
18+
19+
PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
20+
DATASET_ID = os.environ["ENTITY_EXTRACTION_DATASET_ID"]
21+
22+
23+
def test_get_dataset(capsys):
24+
get_dataset.get_dataset(PROJECT_ID, DATASET_ID)
25+
out, _ = capsys.readouterr()
26+
assert "Dataset name: " in out

automl/snippets/import_dataset.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def import_dataset(project_id, dataset_id, path):
17+
"""Import a dataset."""
18+
# [START automl_import_data]
19+
from google.cloud import automl
20+
21+
# TODO(developer): Uncomment and set the following variables
22+
# project_id = "YOUR_PROJECT_ID"
23+
# dataset_id = "YOUR_DATASET_ID"
24+
# path = "gs://YOUR_BUCKET_ID/path/to/data.csv"
25+
26+
client = automl.AutoMlClient()
27+
# Get the full path of the dataset.
28+
dataset_full_id = client.dataset_path(
29+
project_id, "us-central1", dataset_id
30+
)
31+
# Get the multiple Google Cloud Storage URIs
32+
input_uris = path.split(",")
33+
gcs_source = automl.types.GcsSource(input_uris=input_uris)
34+
input_config = automl.types.InputConfig(gcs_source=gcs_source)
35+
# Import data from the input URI
36+
response = client.import_data(dataset_full_id, input_config)
37+
38+
print("Processing import...")
39+
print("Data imported. {}".format(response.result()))
40+
# [END automl_import_data]
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime
16+
import os
17+
18+
from google.cloud import automl
19+
import pytest
20+
21+
import import_dataset
22+
23+
PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
24+
BUCKET_ID = "{}-lcm".format(PROJECT_ID)
25+
26+
27+
@pytest.fixture(scope="function")
28+
def create_dataset():
29+
client = automl.AutoMlClient()
30+
project_location = client.location_path(PROJECT_ID, "us-central1")
31+
display_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
32+
metadata = automl.types.TextSentimentDatasetMetadata(
33+
sentiment_max=4
34+
)
35+
dataset = automl.types.Dataset(
36+
display_name=display_name, text_sentiment_dataset_metadata=metadata
37+
)
38+
response = client.create_dataset(project_location, dataset)
39+
dataset_id = response.result().name.split("/")[-1]
40+
41+
yield dataset_id
42+
43+
44+
@pytest.mark.slow
45+
def test_import_dataset(capsys, create_dataset):
46+
data = (
47+
"gs://{}/sentiment-analysis/dataset.csv".format(BUCKET_ID)
48+
)
49+
dataset_id = create_dataset
50+
import_dataset.import_dataset(PROJECT_ID, dataset_id, data)
51+
out, _ = capsys.readouterr()
52+
assert "Data imported." in out
53+
54+
# delete created dataset
55+
client = automl.AutoMlClient()
56+
dataset_full_id = client.dataset_path(
57+
PROJECT_ID, "us-central1", dataset_id
58+
)
59+
response = client.delete_dataset(dataset_full_id)
60+
response.result()

0 commit comments

Comments
 (0)