Skip to content

Commit 63d46e4

Browse files
authored
Translate: migrate published v3 translate batch samples (#2914)
* Translate: migrate published b v3 tch samples * added missing requirements * extended wait time * inlined some vals and specified input and output * added link to supported file types & modified default values of input uri * fixed small nit
1 parent 570453c commit 63d46e4

7 files changed

+351
-0
lines changed

translate/automl/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1+
google-cloud-translate==2.0.0
2+
google-cloud-storage==1.19.1
13
google-cloud-automl==0.9.0
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
# [START translate_v3_batch_translate_text_with_model]
17+
from google.cloud import translate
18+
19+
20+
def batch_translate_text_with_model(
21+
input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt",
22+
output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/",
23+
project_id="YOUR_PROJECT_ID",
24+
model_id="YOUR_MODEL_ID",
25+
):
26+
"""Batch translate text using Translation model.
27+
Model can be AutoML or General[built-in] model. """
28+
29+
client = translate.TranslationServiceClient()
30+
31+
# Supported file types: https://cloud.google.com/translate/docs/supported-formats
32+
gcs_source = {"input_uri": input_uri}
33+
location = "us-central1"
34+
35+
input_configs_element = {
36+
"gcs_source": gcs_source,
37+
"mime_type": "text/plain" # Can be "text/plain" or "text/html".
38+
}
39+
gcs_destination = {"output_uri_prefix": output_uri}
40+
output_config = {"gcs_destination": gcs_destination}
41+
parent = client.location_path(project_id, location)
42+
43+
model_path = "projects/{}/locations/{}/models/{}".format(
44+
project_id, location, model_id # The location of AutoML model.
45+
)
46+
47+
# Supported language codes: https://cloud.google.com/translate/docs/languages
48+
models = {"ja": model_path} # takes a target lang as key.
49+
50+
operation = client.batch_translate_text(
51+
parent=parent,
52+
source_language_code="en",
53+
target_language_codes=["ja"], # Up to 10 language codes here.
54+
input_configs=[input_configs_element],
55+
output_config=output_config,
56+
models=models,
57+
)
58+
59+
print(u"Waiting for operation to complete...")
60+
response = operation.result()
61+
62+
# Display the translation for each input text provided.
63+
print(u"Total Characters: {}".format(response.total_characters))
64+
print(u"Translated Characters: {}".format(response.translated_characters))
65+
66+
67+
# [END translate_v3_batch_translate_text_with_model]
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import pytest
17+
import uuid
18+
import translate_v3_batch_translate_text_with_model
19+
from google.cloud import storage
20+
21+
PROJECT_ID = os.environ["GCLOUD_PROJECT"]
22+
MODEL_ID = "TRL3128559826197068699"
23+
24+
25+
@pytest.fixture(scope="function")
26+
def bucket():
27+
"""Create a temporary bucket to store annotation output."""
28+
bucket_name = str(uuid.uuid1())
29+
storage_client = storage.Client()
30+
bucket = storage_client.create_bucket(bucket_name)
31+
32+
yield bucket
33+
34+
bucket.delete(force=True)
35+
36+
37+
def test_batch_translate_text_with_model(capsys, bucket):
38+
translate_v3_batch_translate_text_with_model.batch_translate_text_with_model(
39+
"gs://cloud-samples-data/translation/custom_model_text.txt",
40+
"gs://{}/translation/BATCH_TRANSLATION_OUTPUT/".format(bucket.name),
41+
PROJECT_ID,
42+
MODEL_ID,
43+
)
44+
out, _ = capsys.readouterr()
45+
assert "Total Characters: 15" in out
46+
assert "Translated Characters: 15" in out
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START translate_v3_batch_translate_text]
16+
from google.cloud import translate
17+
18+
19+
def batch_translate_text(
20+
input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt",
21+
output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/",
22+
project_id="YOUR_PROJECT_ID"
23+
):
24+
"""Translates a batch of texts on GCS and stores the result in a GCS location."""
25+
26+
client = translate.TranslationServiceClient()
27+
28+
location = "us-central1"
29+
# Supported file types: https://cloud.google.com/translate/docs/supported-formats
30+
gcs_source = {"input_uri": input_uri}
31+
32+
input_configs_element = {
33+
"gcs_source": gcs_source,
34+
"mime_type": "text/plain" # Can be "text/plain" or "text/html".
35+
}
36+
gcs_destination = {"output_uri_prefix": output_uri}
37+
output_config = {"gcs_destination": gcs_destination}
38+
parent = client.location_path(project_id, location)
39+
40+
# Supported language codes: https://cloud.google.com/translate/docs/language
41+
operation = client.batch_translate_text(
42+
parent=parent,
43+
source_language_code="en",
44+
target_language_codes=["ja"], # Up to 10 language codes here.
45+
input_configs=[input_configs_element],
46+
output_config=output_config)
47+
48+
print(u"Waiting for operation to complete...")
49+
response = operation.result(90)
50+
51+
print(u"Total Characters: {}".format(response.total_characters))
52+
print(u"Translated Characters: {}".format(response.translated_characters))
53+
54+
55+
# [END translate_v3_batch_translate_text]
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import pytest
17+
import translate_v3_batch_translate_text
18+
import uuid
19+
from google.cloud import storage
20+
21+
PROJECT_ID = os.environ["GCLOUD_PROJECT"]
22+
23+
24+
@pytest.fixture(scope="function")
25+
def bucket():
26+
"""Create a temporary bucket to store annotation output."""
27+
bucket_name = str(uuid.uuid1())
28+
storage_client = storage.Client()
29+
bucket = storage_client.create_bucket(bucket_name)
30+
31+
yield bucket
32+
33+
bucket.delete(force=True)
34+
35+
36+
def test_batch_translate_text(capsys, bucket):
37+
translate_v3_batch_translate_text.batch_translate_text(
38+
"gs://cloud-samples-data/translation/text.txt",
39+
"gs://{}/translation/BATCH_TRANSLATION_OUTPUT/".format(bucket.name),
40+
PROJECT_ID,
41+
)
42+
out, _ = capsys.readouterr()
43+
assert "Total Characters" in out
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
# [START translate_v3_batch_translate_text_with_glossary]
17+
from google.cloud import translate
18+
19+
20+
def batch_translate_text_with_glossary(
21+
input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt",
22+
output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/",
23+
project_id="YOUR_PROJECT_ID",
24+
glossary_id="YOUR_GLOSSARY_ID",
25+
):
26+
"""Translates a batch of texts on GCS and stores the result in a GCS location.
27+
Glossary is applied for translation."""
28+
29+
client = translate.TranslationServiceClient()
30+
31+
# Supported language codes: https://cloud.google.com/translate/docs/languages
32+
location = "us-central1"
33+
34+
# Supported file types: https://cloud.google.com/translate/docs/supported-formats
35+
gcs_source = {"input_uri": input_uri}
36+
37+
input_configs_element = {
38+
"gcs_source": gcs_source,
39+
"mime_type": "text/plain" # Can be "text/plain" or "text/html".
40+
}
41+
gcs_destination = {"output_uri_prefix": output_uri}
42+
output_config = {"gcs_destination": gcs_destination}
43+
44+
parent = client.location_path(project_id, location)
45+
46+
# glossary is a custom dictionary Translation API uses
47+
# to translate the domain-specific terminology.
48+
glossary_path = client.glossary_path(
49+
project_id, "us-central1", glossary_id # The location of the glossary
50+
)
51+
52+
glossary_config = translate.types.TranslateTextGlossaryConfig(
53+
glossary=glossary_path
54+
)
55+
56+
glossaries = {"ja": glossary_config} # target lang as key
57+
58+
operation = client.batch_translate_text(
59+
parent=parent,
60+
source_language_code="en",
61+
target_language_codes=["ja"], # Up to 10 language codes here.
62+
input_configs=[input_configs_element],
63+
glossaries=glossaries,
64+
output_config=output_config,
65+
)
66+
67+
print(u"Waiting for operation to complete...")
68+
response = operation.result(120)
69+
70+
print(u"Total Characters: {}".format(response.total_characters))
71+
print(u"Translated Characters: {}".format(response.translated_characters))
72+
73+
74+
# [END translate_v3_batch_translate_text_with_glossary]
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import pytest
17+
import uuid
18+
import translate_v3_batch_translate_text_with_glossary
19+
import translate_v3_create_glossary
20+
import translate_v3_delete_glossary
21+
from google.cloud import storage
22+
23+
PROJECT_ID = os.environ["GCLOUD_PROJECT"]
24+
GLOSSARY_INPUT_URI = "gs://cloud-samples-data/translation/glossary_ja.csv"
25+
26+
27+
@pytest.fixture(scope="session")
28+
def glossary():
29+
"""Get the ID of a glossary available to session (do not mutate/delete)."""
30+
glossary_id = "must-start-with-letters-" + str(uuid.uuid1())
31+
translate_v3_create_glossary.create_glossary(
32+
PROJECT_ID, GLOSSARY_INPUT_URI, glossary_id
33+
)
34+
35+
yield glossary_id
36+
37+
try:
38+
translate_v3_delete_glossary.delete_glossary(PROJECT_ID, glossary_id)
39+
except Exception:
40+
pass
41+
42+
43+
@pytest.fixture(scope="function")
44+
def bucket():
45+
"""Create a temporary bucket to store annotation output."""
46+
bucket_name = str(uuid.uuid1())
47+
storage_client = storage.Client()
48+
bucket = storage_client.create_bucket(bucket_name)
49+
50+
yield bucket
51+
52+
bucket.delete(force=True)
53+
54+
55+
def test_batch_translate_text_with_glossary(capsys, bucket, glossary):
56+
translate_v3_batch_translate_text_with_glossary.batch_translate_text_with_glossary(
57+
"gs://cloud-samples-data/translation/text_with_glossary.txt",
58+
"gs://{}/translation/BATCH_TRANSLATION_OUTPUT/".format(bucket.name),
59+
PROJECT_ID,
60+
glossary,
61+
)
62+
63+
out, _ = capsys.readouterr()
64+
assert "Total Characters: 9" in out

0 commit comments

Comments
 (0)