Skip to content

Commit 0f884aa

Browse files
gguussbusunkim96
authored andcommitted
Adds test for encoded characters. [(#961)](#961)
1 parent b861e32 commit 0f884aa

File tree

2 files changed

+21
-5
lines changed

2 files changed

+21
-5
lines changed

language/snippets/cloud-client/v1beta2/snippets.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"""
2323

2424
import argparse
25+
import sys
2526

2627
from google.cloud import language
2728
from google.cloud.gapic.language.v1beta2 import enums
@@ -53,7 +54,7 @@ def sentiment_file(gcs_uri):
5354
language_client = language.Client(api_version='v1beta2')
5455

5556
# Instantiates a plain text document.
56-
document = language_client.document_from_url(gcs_uri)
57+
document = language_client.document_from_gcs_url(gcs_uri)
5758

5859
# Detects sentiment in the document. You can also analyze HTML with:
5960
# document.doc_type == language.Document.HTML
@@ -92,7 +93,7 @@ def entities_file(gcs_uri):
9293
language_client = language.Client(api_version='v1beta2')
9394

9495
# Instantiates a plain text document.
95-
document = language_client.document_from_url(gcs_uri)
96+
document = language_client.document_from_gcs_url(gcs_uri)
9697

9798
# Detects sentiment in the document. You can also analyze HTML with:
9899
# document.doc_type == language.Document.HTML
@@ -131,7 +132,7 @@ def syntax_file(gcs_uri):
131132
language_client = language.Client(api_version='v1beta2')
132133

133134
# Instantiates a plain text document.
134-
document = language_client.document_from_url(gcs_uri)
135+
document = language_client.document_from_gcs_url(gcs_uri)
135136

136137
# Detects syntax in the document. You can also analyze HTML with:
137138
# document.doc_type == language.Document.HTML
@@ -152,8 +153,12 @@ def entity_sentiment_text(text):
152153
document.content = text.encode('utf-8')
153154
document.type = enums.Document.Type.PLAIN_TEXT
154155

156+
encoding = enums.EncodingType.UTF32
157+
if sys.maxunicode == 65535:
158+
encoding = enums.EncodingType.UTF16
159+
155160
result = language_client.analyze_entity_sentiment(
156-
document, enums.EncodingType.UTF8)
161+
document, encoding)
157162

158163
for entity in result.entities:
159164
print('Mentions: ')
@@ -176,8 +181,12 @@ def entity_sentiment_file(gcs_uri):
176181
document.gcs_content_uri = gcs_uri
177182
document.type = enums.Document.Type.PLAIN_TEXT
178183

184+
encoding = enums.EncodingType.UTF32
185+
if sys.maxunicode == 65535:
186+
encoding = enums.EncodingType.UTF16
187+
179188
result = language_client.analyze_entity_sentiment(
180-
document, enums.EncodingType.UTF8)
189+
document, encoding)
181190

182191
for entity in result.entities:
183192
print(u'Name: "{}"'.format(entity.name))

language/snippets/cloud-client/v1beta2/snippets_test.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,10 @@ def test_sentiment_entities_file(capsys):
7979
snippets.entity_sentiment_file(TEST_FILE_URL)
8080
out, _ = capsys.readouterr()
8181
assert 'Content : White House' in out
82+
83+
84+
def test_sentiment_entities_utf(capsys):
85+
snippets.entity_sentiment_text(
86+
'foo→bar')
87+
out, _ = capsys.readouterr()
88+
assert 'Begin Offset : 4' in out

0 commit comments

Comments
 (0)