From 05bc2f07a225be3cd90995bab979b9de0dfa0a73 Mon Sep 17 00:00:00 2001 From: Tom Manshreck Date: Thu, 15 Sep 2016 16:29:08 -0700 Subject: [PATCH 1/7] Add sentiment analysis sample --- language/sentiment/README.md | 48 ++++++++++++++++ language/sentiment/mixed.txt | 20 +++++++ language/sentiment/neg.txt | 4 ++ language/sentiment/neutral.txt | 3 + language/sentiment/pos.txt | 11 ++++ language/sentiment/requirements.txt | 2 + language/sentiment/sentiment_analysis.py | 55 +++++++++++++++++++ language/sentiment/sentiment_analysis_test.py | 47 ++++++++++++++++ 8 files changed, 190 insertions(+) create mode 100644 language/sentiment/README.md create mode 100644 language/sentiment/mixed.txt create mode 100644 language/sentiment/neg.txt create mode 100644 language/sentiment/neutral.txt create mode 100644 language/sentiment/pos.txt create mode 100644 language/sentiment/requirements.txt create mode 100644 language/sentiment/sentiment_analysis.py create mode 100644 language/sentiment/sentiment_analysis_test.py diff --git a/language/sentiment/README.md b/language/sentiment/README.md new file mode 100644 index 00000000000..f267590bd0c --- /dev/null +++ b/language/sentiment/README.md @@ -0,0 +1,48 @@ +# Introduction + +This sample contains the code referenced in the +[Sentiment Analysis Tutorial](http://cloud.google.com/natural-language/docs/sentiment-tutorial) +within the Google Cloud Natural Language API Documentation. A full walkthrough of this sample +is located within the documentation. + +This sample is a simple illustration of how to construct a sentiment analysis +request and process a response using the API. + +## Prerequisites + +1. Install the [Google Cloud SDK](https://cloud.google.com/sdk/), including the [gcloud tool](https://cloud.google.com/sdk/gcloud/), and [gcloud app component](https://cloud.google.com/sdk/gcloud-app). + +2. Setup the gcloud tool. This provides authentication to Google Cloud APIs and services. + +``` +$ gcloud init +``` + + +## Download the Code + +``` +$ git clone https://github.com/GoogleCloudPlatform/python-dev-samples/language/sentiment/ +$ cd python-docs-samples/language/sentiment +``` + +## Run the Code + +Open a sample folder, create a virtualenv, install dependencies, and run the sample: + +``` +$ virtualenv env +$ source env/bin/activate +(env)$ pip install -r requirements.txt +``` + +### Usage + +This sample provides four sample movie reviews which you can +provide to the sample on the command line. (You can also +pass your own text files.) + +``` +(env)$ python sentiment_analysis.py textfile.txt +Sentiment: polarity of -0.1 with magnitude of 6.7 +``` diff --git a/language/sentiment/mixed.txt b/language/sentiment/mixed.txt new file mode 100644 index 00000000000..d4a42aa2928 --- /dev/null +++ b/language/sentiment/mixed.txt @@ -0,0 +1,20 @@ +I really wanted to love 'Bladerunner' but ultimately I couldn't get +myself to appreciate it fully. However, you may like it if you're into +science fiction, especially if you're interested in the philosophical +exploration of what it means to be human or machine. Some of the gizmos +like the flying cars and the Vouight-Kampff machine (which seemed very +steampunk), were quite cool. + +I did find the plot pretty slow and but the dialogue and action sequences +were good. Unlike most science fiction films, this one was mostly quiet, and +not all that much happened, except during the last 15 minutes. I didn't +understand why a unicorn was in the movie. The visual effects were fantastic, +however, and the musical score and overall mood was quite interesting. +A futurist Los Angeles that was both highly polished and also falling apart +reminded me of 'Outland.' Certainly, the style of the film made up for +many of its pedantic plot holes. + +If you want your sci-fi to be lasers and spaceships, 'Bladerunner' may +disappoint you. But if you want it to make you think, this movie may +be worth the money. + diff --git a/language/sentiment/neg.txt b/language/sentiment/neg.txt new file mode 100644 index 00000000000..5dcbec0f8c5 --- /dev/null +++ b/language/sentiment/neg.txt @@ -0,0 +1,4 @@ +What was Hollywood thinking with this movie! I hated, +hated, hated it. BORING! I went afterwards and demanded my money back. +They refused. + diff --git a/language/sentiment/neutral.txt b/language/sentiment/neutral.txt new file mode 100644 index 00000000000..89839ef25cf --- /dev/null +++ b/language/sentiment/neutral.txt @@ -0,0 +1,3 @@ +I neither liked nor disliked this movie. Parts were interesting, but +overall I was left wanting more. The acting was pretty good. + diff --git a/language/sentiment/pos.txt b/language/sentiment/pos.txt new file mode 100644 index 00000000000..5f211496775 --- /dev/null +++ b/language/sentiment/pos.txt @@ -0,0 +1,11 @@ +`Bladerunner` is often touted as one of the best science fiction films ever +made. Indeed, it satisfies many of the requisites for good sci-fi: a future +world with flying cars and humanoid robots attempting to rebel against their +creators. But more than anything, `Bladerunner` is a fantastic exploration +of the nature of what it means to be human. If we create robots which can +think, will they become human? And if they do, what makes us unique? Indeed, +how can we be sure we're not human in any case? `Bladerunner` explored +these issues before such movies as `The Matrix,' and did so intelligently. +The visual effects and score by Vangelis set the mood. See this movie +in a dark theatre to appreciate it fully. Highly recommended! + diff --git a/language/sentiment/requirements.txt b/language/sentiment/requirements.txt new file mode 100644 index 00000000000..dc1d6a1d6ef --- /dev/null +++ b/language/sentiment/requirements.txt @@ -0,0 +1,2 @@ +google-api-python-client==1.5.3 + diff --git a/language/sentiment/sentiment_analysis.py b/language/sentiment/sentiment_analysis.py new file mode 100644 index 00000000000..a60ed87241d --- /dev/null +++ b/language/sentiment/sentiment_analysis.py @@ -0,0 +1,55 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +'''Demonstrates how to make a simple call to the Natural Language API''' + +import argparse +from googleapiclient import discovery +from oauth2client.client import GoogleCredentials + + +def main(movie_review_filename): + '''Run a sentiment analysis request on text within a passed filename.''' + + credentials = GoogleCredentials.get_application_default() + service = discovery.build('language', 'v1beta1', credentials=credentials) + + with open(movie_review_filename, 'r') as review_file: + service_request = service.documents().analyzeSentiment( + body={ + 'document': { + 'type': 'PLAIN_TEXT', + 'content': review_file.read(), + } + } + ) + response = service_request.execute() + + polarity = response['documentSentiment']['polarity'] + magnitude = response['documentSentiment']['magnitude'] + + print('Sentiment: polarity of {} with magnitude of {}'.format( + polarity, magnitude)) + return 0 + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + 'movie_review_filename', + help='The filename of the movie review you\'d like to analyze.') + args = parser.parse_args() + main(args.movie_review_filename) + diff --git a/language/sentiment/sentiment_analysis_test.py b/language/sentiment/sentiment_analysis_test.py new file mode 100644 index 00000000000..7d86402aa37 --- /dev/null +++ b/language/sentiment/sentiment_analysis_test.py @@ -0,0 +1,47 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from sentiment_analysis import main + + +def test_pos(resource, capsys): + main(resource('pos.txt')) + out, err = capsys.readouterr() + polarity = re.search('polarity of (.+?) with', out) + magnitude = re.search('magnitude of (.+?)', out) + assert polarity * magnitude > 0 + + +def test_neg(resource, capsys): + main(resource('neg.txt')) + out, err = capsys.readouterr() + polarity = re.search('polarity of (.+?) with', out) + magnitude = re.search('magnitude of (.+?)', out) + assert polarity * magnitude < 0 + + +def test_mixed(resource, capsys): + main(resource('mixed.txt')) + out, err = capsys.readouterr() + polarity = re.search('polarity of (.+?) with', out) + assert polarity <= 0.3 is True + assert polarity >= -0.3 is True + + +def test_neutral(resource, capsys): + main(resource('neutral.txt')) + out, err = capsys.readouterr() + magnitude = re.search('magnitude of (.+?)', out) + assert magnitude <= 2.0 is True + From 82a037945e441983521567ba62753720587f637f Mon Sep 17 00:00:00 2001 From: Tom Manshreck Date: Thu, 15 Sep 2016 16:39:41 -0700 Subject: [PATCH 2/7] Move sample review files into resources directory --- language/sentiment/{ => resources}/mixed.txt | 0 language/sentiment/{ => resources}/neg.txt | 0 language/sentiment/{ => resources}/neutral.txt | 0 language/sentiment/{ => resources}/pos.txt | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename language/sentiment/{ => resources}/mixed.txt (100%) rename language/sentiment/{ => resources}/neg.txt (100%) rename language/sentiment/{ => resources}/neutral.txt (100%) rename language/sentiment/{ => resources}/pos.txt (100%) diff --git a/language/sentiment/mixed.txt b/language/sentiment/resources/mixed.txt similarity index 100% rename from language/sentiment/mixed.txt rename to language/sentiment/resources/mixed.txt diff --git a/language/sentiment/neg.txt b/language/sentiment/resources/neg.txt similarity index 100% rename from language/sentiment/neg.txt rename to language/sentiment/resources/neg.txt diff --git a/language/sentiment/neutral.txt b/language/sentiment/resources/neutral.txt similarity index 100% rename from language/sentiment/neutral.txt rename to language/sentiment/resources/neutral.txt diff --git a/language/sentiment/pos.txt b/language/sentiment/resources/pos.txt similarity index 100% rename from language/sentiment/pos.txt rename to language/sentiment/resources/pos.txt From 75cab6fdbdbfc63e35e46665bae95ab61f75e54f Mon Sep 17 00:00:00 2001 From: Tom Manshreck Date: Thu, 15 Sep 2016 17:09:03 -0700 Subject: [PATCH 3/7] Remove blank line from end of file --- language/sentiment/sentiment_analysis.py | 1 - language/sentiment/sentiment_analysis_test.py | 1 - 2 files changed, 2 deletions(-) diff --git a/language/sentiment/sentiment_analysis.py b/language/sentiment/sentiment_analysis.py index a60ed87241d..8e250881305 100644 --- a/language/sentiment/sentiment_analysis.py +++ b/language/sentiment/sentiment_analysis.py @@ -52,4 +52,3 @@ def main(movie_review_filename): help='The filename of the movie review you\'d like to analyze.') args = parser.parse_args() main(args.movie_review_filename) - diff --git a/language/sentiment/sentiment_analysis_test.py b/language/sentiment/sentiment_analysis_test.py index 7d86402aa37..e03ae78a993 100644 --- a/language/sentiment/sentiment_analysis_test.py +++ b/language/sentiment/sentiment_analysis_test.py @@ -44,4 +44,3 @@ def test_neutral(resource, capsys): out, err = capsys.readouterr() magnitude = re.search('magnitude of (.+?)', out) assert magnitude <= 2.0 is True - From 31d1c3c2364fb3cdc7365bf8cd2294fac4e82b2d Mon Sep 17 00:00:00 2001 From: Tom Manshreck Date: Thu, 15 Sep 2016 17:22:34 -0700 Subject: [PATCH 4/7] Update set up instructions to point to getting started guide --- language/sentiment/README.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/language/sentiment/README.md b/language/sentiment/README.md index f267590bd0c..d32be3f2521 100644 --- a/language/sentiment/README.md +++ b/language/sentiment/README.md @@ -12,12 +12,7 @@ request and process a response using the API. 1. Install the [Google Cloud SDK](https://cloud.google.com/sdk/), including the [gcloud tool](https://cloud.google.com/sdk/gcloud/), and [gcloud app component](https://cloud.google.com/sdk/gcloud-app). -2. Setup the gcloud tool. This provides authentication to Google Cloud APIs and services. - -``` -$ gcloud init -``` - +2. Set up your [Cloud Natural Language API project](https://cloud.google.com/natural-language/docs/getting-started#set_up_a_project), which includes setting up a service account, and ensuring you've properly set up your `GOOGLE_APPLICATION_CREDENTIALS` for proper authentication to the service. ## Download the Code From 6657fb67ecda5fb37ef3ec5727ba13aa3a2f3ea9 Mon Sep 17 00:00:00 2001 From: Tom Manshreck Date: Fri, 16 Sep 2016 12:46:43 -0700 Subject: [PATCH 5/7] Update README to remove need to set up gcloud. Itemize what setting up a project entails. --- language/sentiment/README.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/language/sentiment/README.md b/language/sentiment/README.md index d32be3f2521..e77cdf16bef 100644 --- a/language/sentiment/README.md +++ b/language/sentiment/README.md @@ -10,9 +10,14 @@ request and process a response using the API. ## Prerequisites -1. Install the [Google Cloud SDK](https://cloud.google.com/sdk/), including the [gcloud tool](https://cloud.google.com/sdk/gcloud/), and [gcloud app component](https://cloud.google.com/sdk/gcloud-app). - -2. Set up your [Cloud Natural Language API project](https://cloud.google.com/natural-language/docs/getting-started#set_up_a_project), which includes setting up a service account, and ensuring you've properly set up your `GOOGLE_APPLICATION_CREDENTIALS` for proper authentication to the service. +Set up your +[Cloud Natural Language API project](https://cloud.google.com/natural-language/docs/getting-started#set_up_a_project) +, which includes: + +* Enabling the Natural Language API +* Setting up a service account +* Ensuring you've properly set up your `GOOGLE_APPLICATION_CREDENTIALS` for proper + authentication to the service. ## Download the Code From 4b7201c56a64dfaa9ab4cb3ce56954369ceebb0e Mon Sep 17 00:00:00 2001 From: Tom Manshreck Date: Fri, 16 Sep 2016 14:10:35 -0700 Subject: [PATCH 6/7] Update NL README to link to Sentiment tutorial code --- language/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/language/README.md b/language/README.md index e63d45eb9a6..1e4a6401bbd 100644 --- a/language/README.md +++ b/language/README.md @@ -13,5 +13,9 @@ to extract text from images, then uses the NL API to extract entity information from those texts, and stores the extracted information in a database in support of further analysis and correlation. +- [sentiment](sentiment) contains the [Sentiment Analysis + Tutorial](https://cloud.google.com/natural-language/docs/sentiment-tutorial) +code as used within the documentation. + - [syntax_triples](syntax_triples) uses syntax analysis to find subject-verb-object triples in a given piece of text. From 72dcdc9a7bc9ba65399d5268f42043b52b34932a Mon Sep 17 00:00:00 2001 From: Jerjou Cheng Date: Fri, 16 Sep 2016 20:59:43 -0700 Subject: [PATCH 7/7] Coerce number types before comparison --- language/sentiment/sentiment_analysis_test.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/language/sentiment/sentiment_analysis_test.py b/language/sentiment/sentiment_analysis_test.py index e03ae78a993..d6b6a7abfea 100644 --- a/language/sentiment/sentiment_analysis_test.py +++ b/language/sentiment/sentiment_analysis_test.py @@ -18,29 +18,29 @@ def test_pos(resource, capsys): main(resource('pos.txt')) out, err = capsys.readouterr() - polarity = re.search('polarity of (.+?) with', out) - magnitude = re.search('magnitude of (.+?)', out) + polarity = float(re.search('polarity of (.+?) with', out).group(1)) + magnitude = float(re.search('magnitude of (.+?)', out).group(1)) assert polarity * magnitude > 0 def test_neg(resource, capsys): main(resource('neg.txt')) out, err = capsys.readouterr() - polarity = re.search('polarity of (.+?) with', out) - magnitude = re.search('magnitude of (.+?)', out) + polarity = float(re.search('polarity of (.+?) with', out).group(1)) + magnitude = float(re.search('magnitude of (.+?)', out).group(1)) assert polarity * magnitude < 0 def test_mixed(resource, capsys): main(resource('mixed.txt')) out, err = capsys.readouterr() - polarity = re.search('polarity of (.+?) with', out) - assert polarity <= 0.3 is True - assert polarity >= -0.3 is True + polarity = float(re.search('polarity of (.+?) with', out).group(1)) + assert polarity <= 0.3 + assert polarity >= -0.3 def test_neutral(resource, capsys): main(resource('neutral.txt')) out, err = capsys.readouterr() - magnitude = re.search('magnitude of (.+?)', out) - assert magnitude <= 2.0 is True + magnitude = float(re.search('magnitude of (.+?)', out).group(1)) + assert magnitude <= 2.0