Skip to content
This repository was archived by the owner on Dec 31, 2023. It is now read-only.

Commit 2d9ce30

Browse files
nnegreybusunkim96
authored andcommitted
automl: add natural language entity extraction ga samples [(#2676)](GoogleCloudPlatform/python-docs-samples#2676)
* automl: add natural language entity extraction ga samples * Update language_entity_extraction_predict_test.py * Update language_entity_extraction_predict_test.py * use centralized automl testing project and add comments that link to docs
1 parent b85df5e commit 2d9ce30

6 files changed

+262
-0
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def create_dataset(project_id, display_name):
17+
"""Create a dataset."""
18+
# [START automl_language_entity_extraction_create_dataset]
19+
from google.cloud import automl
20+
21+
# TODO(developer): Uncomment and set the following variables
22+
# project_id = "YOUR_PROJECT_ID"
23+
# display_name = "YOUR_DATASET_NAME"
24+
25+
client = automl.AutoMlClient()
26+
27+
# A resource that represents Google Cloud Platform location.
28+
project_location = client.location_path(project_id, "us-central1")
29+
metadata = automl.types.TextExtractionDatasetMetadata()
30+
dataset = automl.types.Dataset(
31+
display_name=display_name, text_extraction_dataset_metadata=metadata
32+
)
33+
34+
# Create a dataset with the dataset metadata in the region.
35+
response = client.create_dataset(project_location, dataset)
36+
37+
created_dataset = response.result()
38+
39+
# Display the dataset information
40+
print("Dataset name: {}".format(created_dataset.name))
41+
print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))
42+
# [END automl_language_entity_extraction_create_dataset]
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime
16+
import os
17+
18+
from google.cloud import automl
19+
20+
import language_entity_extraction_create_dataset
21+
22+
23+
PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
24+
25+
26+
def test_entity_extraction_create_dataset(capsys):
27+
# create dataset
28+
dataset_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
29+
language_entity_extraction_create_dataset.create_dataset(
30+
PROJECT_ID, dataset_name
31+
)
32+
out, _ = capsys.readouterr()
33+
assert "Dataset id: " in out
34+
35+
# Delete the created dataset
36+
dataset_id = out.splitlines()[1].split()[2]
37+
client = automl.AutoMlClient()
38+
dataset_full_id = client.dataset_path(
39+
PROJECT_ID, "us-central1", dataset_id
40+
)
41+
response = client.delete_dataset(dataset_full_id)
42+
response.result()
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def create_model(project_id, dataset_id, display_name):
17+
"""Create a model."""
18+
# [START automl_language_entity_extraction_create_model]
19+
from google.cloud import automl
20+
21+
# TODO(developer): Uncomment and set the following variables
22+
# project_id = "YOUR_PROJECT_ID"
23+
# dataset_id = "YOUR_DATASET_ID"
24+
# display_name = "YOUR_MODEL_NAME"
25+
26+
client = automl.AutoMlClient()
27+
28+
# A resource that represents Google Cloud Platform location.
29+
project_location = client.location_path(project_id, "us-central1")
30+
# Leave model unset to use the default base model provided by Google
31+
metadata = automl.types.TextExtractionModelMetadata()
32+
model = automl.types.Model(
33+
display_name=display_name,
34+
dataset_id=dataset_id,
35+
text_extraction_model_metadata=metadata,
36+
)
37+
38+
# Create a model with the model metadata in the region.
39+
response = client.create_model(project_location, model)
40+
41+
print("Training operation name: {}".format(response.operation.name))
42+
print("Training started...")
43+
# [END automl_language_entity_extraction_create_model]
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import language_entity_extraction_create_model
18+
19+
PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
20+
DATASET_ID = "TEN0000000000000000000"
21+
22+
23+
def test_entity_extraction_create_model(capsys):
24+
# As entity extraction does not let you cancel model creation, instead try
25+
# to create a model from a nonexistent dataset, but other elements of the
26+
# request were valid.
27+
try:
28+
language_entity_extraction_create_model.create_model(
29+
PROJECT_ID, DATASET_ID, "classification_test_create_model"
30+
)
31+
out, _ = capsys.readouterr()
32+
assert "Dataset does not exist." in out
33+
except Exception as e:
34+
assert "Dataset does not exist." in e.message
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def predict(project_id, model_id, content):
17+
"""Predict."""
18+
# [START automl_language_entity_extraction_predict]
19+
from google.cloud import automl
20+
21+
# TODO(developer): Uncomment and set the following variables
22+
# project_id = "YOUR_PROJECT_ID"
23+
# model_id = "YOUR_MODEL_ID"
24+
# content = "text to predict"
25+
26+
prediction_client = automl.PredictionServiceClient()
27+
28+
# Get the full path of the model.
29+
model_full_id = prediction_client.model_path(
30+
project_id, "us-central1", model_id
31+
)
32+
33+
# Supported mime_types: 'text/plain', 'text/html'
34+
# https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#textsnippet
35+
text_snippet = automl.types.TextSnippet(
36+
content=content, mime_type="text/plain"
37+
)
38+
payload = automl.types.ExamplePayload(text_snippet=text_snippet)
39+
40+
response = prediction_client.predict(model_full_id, payload)
41+
42+
for annotation_payload in response.payload:
43+
print(
44+
"Text Extract Entity Types: {}".format(
45+
annotation_payload.display_name
46+
)
47+
)
48+
print(
49+
"Text Score: {}".format(annotation_payload.text_extraction.score)
50+
)
51+
text_segment = annotation_payload.text_extraction.text_segment
52+
print("Text Extract Entity Content: {}".format(text_segment.content))
53+
print("Text Start Offset: {}".format(text_segment.start_offset))
54+
print("Text End Offset: {}".format(text_segment.end_offset))
55+
# [END automl_language_entity_extraction_predict]
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
from google.cloud import automl
18+
import pytest
19+
20+
import language_entity_extraction_predict
21+
22+
PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
23+
MODEL_ID = os.environ["ENTITY_EXTRACTION_MODEL_ID"]
24+
25+
26+
@pytest.fixture(scope="function")
27+
def verify_model_state():
28+
client = automl.AutoMlClient()
29+
model_full_id = client.model_path(PROJECT_ID, "us-central1", MODEL_ID)
30+
31+
model = client.get_model(model_full_id)
32+
if model.deployment_state == automl.enums.Model.DeploymentState.UNDEPLOYED:
33+
# Deploy model if it is not deployed
34+
response = client.deploy_model(model_full_id)
35+
response.result()
36+
37+
38+
def test_predict(capsys, verify_model_state):
39+
verify_model_state
40+
text = (
41+
"Constitutional mutations in the WT1 gene in patients with "
42+
"Denys-Drash syndrome."
43+
)
44+
language_entity_extraction_predict.predict(PROJECT_ID, MODEL_ID, text)
45+
out, _ = capsys.readouterr()
46+
assert "Text Extract Entity Types: " in out

0 commit comments

Comments
 (0)