diff --git a/language/cloud-client/v1beta2/snippets.py b/language/cloud-client/v1beta2/snippets.py index 2e6745d2c94..c9f6f32ad5a 100644 --- a/language/cloud-client/v1beta2/snippets.py +++ b/language/cloud-client/v1beta2/snippets.py @@ -22,6 +22,7 @@ """ import argparse +import sys from google.cloud import language from google.cloud.gapic.language.v1beta2 import enums @@ -53,7 +54,7 @@ def sentiment_file(gcs_uri): language_client = language.Client(api_version='v1beta2') # Instantiates a plain text document. - document = language_client.document_from_url(gcs_uri) + document = language_client.document_from_gcs_url(gcs_uri) # Detects sentiment in the document. You can also analyze HTML with: # document.doc_type == language.Document.HTML @@ -92,7 +93,7 @@ def entities_file(gcs_uri): language_client = language.Client(api_version='v1beta2') # Instantiates a plain text document. - document = language_client.document_from_url(gcs_uri) + document = language_client.document_from_gcs_url(gcs_uri) # Detects sentiment in the document. You can also analyze HTML with: # document.doc_type == language.Document.HTML @@ -131,7 +132,7 @@ def syntax_file(gcs_uri): language_client = language.Client(api_version='v1beta2') # Instantiates a plain text document. - document = language_client.document_from_url(gcs_uri) + document = language_client.document_from_gcs_url(gcs_uri) # Detects syntax in the document. You can also analyze HTML with: # document.doc_type == language.Document.HTML @@ -152,8 +153,12 @@ def entity_sentiment_text(text): document.content = text.encode('utf-8') document.type = enums.Document.Type.PLAIN_TEXT + encoding = enums.EncodingType.UTF32 + if sys.maxunicode == 65535: + encoding = enums.EncodingType.UTF16 + result = language_client.analyze_entity_sentiment( - document, enums.EncodingType.UTF8) + document, encoding) for entity in result.entities: print('Mentions: ') @@ -176,8 +181,12 @@ def entity_sentiment_file(gcs_uri): document.gcs_content_uri = gcs_uri document.type = enums.Document.Type.PLAIN_TEXT + encoding = enums.EncodingType.UTF32 + if sys.maxunicode == 65535: + encoding = enums.EncodingType.UTF16 + result = language_client.analyze_entity_sentiment( - document, enums.EncodingType.UTF8) + document, encoding) for entity in result.entities: print(u'Name: "{}"'.format(entity.name)) diff --git a/language/cloud-client/v1beta2/snippets_test.py b/language/cloud-client/v1beta2/snippets_test.py index 8db7aa1dbdf..e6db221780b 100644 --- a/language/cloud-client/v1beta2/snippets_test.py +++ b/language/cloud-client/v1beta2/snippets_test.py @@ -79,3 +79,10 @@ def test_sentiment_entities_file(capsys): snippets.entity_sentiment_file(TEST_FILE_URL) out, _ = capsys.readouterr() assert 'Content : White House' in out + + +def test_sentiment_entities_utf(capsys): + snippets.entity_sentiment_text( + 'foo→bar') + out, _ = capsys.readouterr() + assert 'Begin Offset : 4' in out