diff --git a/language/cloud-client/v1/snippets.py b/language/cloud-client/v1/snippets.py index 704c6347eac..e13fc7dd6c8 100644 --- a/language/cloud-client/v1/snippets.py +++ b/language/cloud-client/v1/snippets.py @@ -22,10 +22,12 @@ """ import argparse +import sys from google.cloud import language from google.cloud.language import enums from google.cloud.language import types + import six @@ -192,12 +194,80 @@ def syntax_file(gcs_uri): # [END def_syntax_file] +# [START def_entity_sentiment_text] +def entity_sentiment_text(text): + """Detects entity sentiment in the provided text.""" + client = language.LanguageServiceClient() + + if isinstance(text, six.binary_type): + text = text.decode('utf-8') + + document = types.Document( + content=text.encode('utf-8'), + type=enums.Document.Type.PLAIN_TEXT) + + # Detect and send native Python encoding to receive correct word offsets. + encoding = enums.EncodingType.UTF32 + if sys.maxunicode == 65535: + encoding = enums.EncodingType.UTF16 + + result = client.analyze_entity_sentiment(document, encoding) + + for entity in result.entities: + print('Mentions: ') + print(u'Name: "{}"'.format(entity.name)) + for mention in entity.mentions: + print(u' Begin Offset : {}'.format(mention.text.begin_offset)) + print(u' Content : {}'.format(mention.text.content)) + print(u' Magnitude : {}'.format(mention.sentiment.magnitude)) + print(u' Sentiment : {}'.format(mention.sentiment.score)) + print(u' Type : {}'.format(mention.type)) + print(u'Salience: {}'.format(entity.salience)) + print(u'Sentiment: {}\n'.format(entity.sentiment)) +# [END def_entity_sentiment_text] + + +def entity_sentiment_file(gcs_uri): + """Detects entity sentiment in a Google Cloud Storage file.""" + client = language.LanguageServiceClient() + + document = types.Document( + gcs_content_uri=gcs_uri, + type=enums.Document.Type.PLAIN_TEXT) + + # Detect and send native Python encoding to receive correct word offsets. + encoding = enums.EncodingType.UTF32 + if sys.maxunicode == 65535: + encoding = enums.EncodingType.UTF16 + + result = client.analyze_entity_sentiment(document, encoding) + + for entity in result.entities: + print(u'Name: "{}"'.format(entity.name)) + for mention in entity.mentions: + print(u' Begin Offset : {}'.format(mention.text.begin_offset)) + print(u' Content : {}'.format(mention.text.content)) + print(u' Magnitude : {}'.format(mention.sentiment.magnitude)) + print(u' Sentiment : {}'.format(mention.sentiment.score)) + print(u' Type : {}'.format(mention.type)) + print(u'Salience: {}'.format(entity.salience)) + print(u'Sentiment: {}\n'.format(entity.sentiment)) + + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) subparsers = parser.add_subparsers(dest='command') + sentiment_entities_text_parser = subparsers.add_parser( + 'sentiment-entities-text', help=entity_sentiment_text.__doc__) + sentiment_entities_text_parser.add_argument('text') + + sentiment_entities_file_parser = subparsers.add_parser( + 'sentiment-entities-file', help=entity_sentiment_file.__doc__) + sentiment_entities_file_parser.add_argument('gcs_uri') + sentiment_text_parser = subparsers.add_parser( 'sentiment-text', help=sentiment_text.__doc__) sentiment_text_parser.add_argument('text') @@ -236,3 +306,7 @@ def syntax_file(gcs_uri): syntax_text(args.text) elif args.command == 'syntax-file': syntax_file(args.gcs_uri) + elif args.command == 'sentiment-entities-text': + entity_sentiment_text(args.text) + elif args.command == 'sentiment-entities-file': + entity_sentiment_file(args.gcs_uri) diff --git a/language/cloud-client/v1/snippets_test.py b/language/cloud-client/v1/snippets_test.py index 8bbdaf9deb3..168701dc666 100644 --- a/language/cloud-client/v1/snippets_test.py +++ b/language/cloud-client/v1/snippets_test.py @@ -1,4 +1,5 @@ -# Copyright 2016 Google, Inc. +# -*- coding: utf-8 -*- +# Copyright 2017 Google, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -56,3 +57,23 @@ def test_syntax_file(capsys): snippets.syntax_file(TEST_FILE_URL) out, _ = capsys.readouterr() assert 'NOUN: President' in out + + +def test_sentiment_entities_text(capsys): + snippets.entity_sentiment_text( + 'President Obama is speaking at the White House.') + out, _ = capsys.readouterr() + assert 'Content : White House' in out + + +def test_sentiment_entities_file(capsys): + snippets.entity_sentiment_file(TEST_FILE_URL) + out, _ = capsys.readouterr() + assert 'Content : White House' in out + + +def test_sentiment_entities_utf(capsys): + snippets.entity_sentiment_text( + 'foo→bar') + out, _ = capsys.readouterr() + assert 'Begin Offset : 4' in out diff --git a/language/cloud-client/v1beta2/resources/android_text.txt b/language/cloud-client/v1beta2/resources/android_text.txt new file mode 100644 index 00000000000..c05c452dc00 --- /dev/null +++ b/language/cloud-client/v1beta2/resources/android_text.txt @@ -0,0 +1 @@ +Android is a mobile operating system developed by Google, based on the Linux kernel and designed primarily for touchscreen mobile devices such as smartphones and tablets. diff --git a/language/cloud-client/v1beta2/snippets.py b/language/cloud-client/v1beta2/snippets.py index 0ea15f7fc42..3ccc2933cd7 100644 --- a/language/cloud-client/v1beta2/snippets.py +++ b/language/cloud-client/v1beta2/snippets.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2017 Google, Inc. +# Copyright 2016 Google, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,7 +22,6 @@ """ import argparse -import sys # [START beta_import] from google.cloud import language_v1beta2 @@ -173,9 +172,9 @@ def syntax_file(gcs_uri): token.text.content)) -# [START def_entity_sentiment_text] -def entity_sentiment_text(text): - """Detects entity sentiment in the provided text.""" +# [START def_classify_text] +def classify_text(text): + """Classifies the provided text.""" # [START beta_client] client = language_v1beta2.LanguageServiceClient() # [END beta_client] @@ -187,52 +186,31 @@ def entity_sentiment_text(text): content=text.encode('utf-8'), type=enums.Document.Type.PLAIN_TEXT) - # Pass in encoding type to get useful offsets in the response. - encoding = enums.EncodingType.UTF32 - if sys.maxunicode == 65535: - encoding = enums.EncodingType.UTF16 - - result = client.analyze_entity_sentiment(document, encoding) - - for entity in result.entities: - print('Mentions: ') - print(u'Name: "{}"'.format(entity.name)) - for mention in entity.mentions: - print(u' Begin Offset : {}'.format(mention.text.begin_offset)) - print(u' Content : {}'.format(mention.text.content)) - print(u' Magnitude : {}'.format(mention.sentiment.magnitude)) - print(u' Sentiment : {}'.format(mention.sentiment.score)) - print(u' Type : {}'.format(mention.type)) - print(u'Salience: {}'.format(entity.salience)) - print(u'Sentiment: {}\n'.format(entity.sentiment)) -# [END def_entity_sentiment_text] - - -def entity_sentiment_file(gcs_uri): - """Detects entity sentiment in a Google Cloud Storage file.""" + categories = client.classify_text(document).categories + + for category in categories: + print(u'=' * 20) + print(u'{:<16}: {}'.format('name', category.name)) + print(u'{:<16}: {}'.format('confidence', category.confidence)) +# [END def_classify_text] + + +# [START def_classify_file] +def classify_file(gcs_uri): + """Classifies the text in a Google Cloud Storage file.""" client = language_v1beta2.LanguageServiceClient() document = types.Document( gcs_content_uri=gcs_uri, type=enums.Document.Type.PLAIN_TEXT) - # Pass in encoding type to get useful offsets in the response. - encoding = enums.EncodingType.UTF32 - if sys.maxunicode == 65535: - encoding = enums.EncodingType.UTF16 - - result = client.analyze_entity_sentiment(document, encoding) + categories = client.classify_text(document).categories - for entity in result.entities: - print(u'Name: "{}"'.format(entity.name)) - for mention in entity.mentions: - print(u' Begin Offset : {}'.format(mention.text.begin_offset)) - print(u' Content : {}'.format(mention.text.content)) - print(u' Magnitude : {}'.format(mention.sentiment.magnitude)) - print(u' Sentiment : {}'.format(mention.sentiment.score)) - print(u' Type : {}'.format(mention.type)) - print(u'Salience: {}'.format(entity.salience)) - print(u'Sentiment: {}\n'.format(entity.sentiment)) + for category in categories: + print(u'=' * 20) + print(u'{:<16}: {}'.format('name', category.name)) + print(u'{:<16}: {}'.format('confidence', category.confidence)) +# [END def_classify_file] if __name__ == '__main__': @@ -241,13 +219,13 @@ def entity_sentiment_file(gcs_uri): formatter_class=argparse.RawDescriptionHelpFormatter) subparsers = parser.add_subparsers(dest='command') - sentiment_entities_text_parser = subparsers.add_parser( - 'sentiment-entities-text', help=entity_sentiment_text.__doc__) - sentiment_entities_text_parser.add_argument('text') + classify_text_parser = subparsers.add_parser( + 'classify-text', help=classify_text.__doc__) + classify_text_parser.add_argument('text') - sentiment_entities_file_parser = subparsers.add_parser( - 'sentiment-entities-file', help=entity_sentiment_file.__doc__) - sentiment_entities_file_parser.add_argument('gcs_uri') + classify_text_parser = subparsers.add_parser( + 'classify-file', help=classify_file.__doc__) + classify_text_parser.add_argument('gcs_uri') sentiment_text_parser = subparsers.add_parser( 'sentiment-text', help=sentiment_text.__doc__) @@ -287,7 +265,7 @@ def entity_sentiment_file(gcs_uri): syntax_text(args.text) elif args.command == 'syntax-file': syntax_file(args.gcs_uri) - elif args.command == 'sentiment-entities-text': - entity_sentiment_text(args.text) - elif args.command == 'sentiment-entities-file': - entity_sentiment_file(args.gcs_uri) + elif args.command == 'classify-text': + classify_text(args.text) + elif args.command == 'classify-file': + classify_file(args.gcs_uri) diff --git a/language/cloud-client/v1beta2/snippets_test.py b/language/cloud-client/v1beta2/snippets_test.py index e6db221780b..d440136b501 100644 --- a/language/cloud-client/v1beta2/snippets_test.py +++ b/language/cloud-client/v1beta2/snippets_test.py @@ -19,6 +19,7 @@ BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] TEST_FILE_URL = 'gs://{}/text.txt'.format(BUCKET) +LONG_TEST_FILE_URL = 'gs://{}/android_text.txt'.format(BUCKET) def test_sentiment_text(capsys): @@ -68,21 +69,18 @@ def test_syntax_file(capsys): assert 'NOUN: President' in out -def test_sentiment_entities_text(capsys): - snippets.entity_sentiment_text( - 'President Obama is speaking at the White House.') +def test_classify_text(capsys): + snippets.classify_text( + 'Android is a mobile operating system developed by Google, ' + 'based on the Linux kernel and designed primarily for touchscreen ' + 'mobile devices such as smartphones and tablets.') out, _ = capsys.readouterr() - assert 'Content : White House' in out - - -def test_sentiment_entities_file(capsys): - snippets.entity_sentiment_file(TEST_FILE_URL) - out, _ = capsys.readouterr() - assert 'Content : White House' in out + assert 'name' in out + assert '/Computers & Electronics' in out -def test_sentiment_entities_utf(capsys): - snippets.entity_sentiment_text( - 'foo→bar') +def test_classify_file(capsys): + snippets.classify_file(LONG_TEST_FILE_URL) out, _ = capsys.readouterr() - assert 'Begin Offset : 4' in out + assert 'name' in out + assert '/Computers & Electronics' in out