Skip to content

Movie sample changes (in progress) - DO NOT MERGE before Nov 15. #597

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Nov 15, 2016
70 changes: 18 additions & 52 deletions language/movie_nl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from googleapiclient import discovery
from googleapiclient.errors import HttpError
import httplib2
from oauth2client.client import GoogleCredentials
import requests

Expand All @@ -30,10 +31,7 @@ def analyze_document(service, document):
the movie name."""
logging.info('Analyzing {}'.format(document.doc_id))

sentences, entities = document.extract_all_sentences(service)

sentiments = [get_sentiment(service, sentence) for sentence in sentences]

sentiments, entities = document.extract_sentiment_entities(service)
return sentiments, entities


Expand All @@ -56,62 +54,35 @@ def get_request_body(text, syntax=True, entities=True, sentiment=True):
return body


def get_sentiment(service, sentence):
"""Get the sentence-level sentiment."""
body = get_request_body(
sentence, syntax=False, entities=True, sentiment=True)

docs = service.documents()
request = docs.annotateText(body=body)

response = request.execute(num_retries=3)

sentiment = response.get('documentSentiment')

if sentiment is None:
return (None, None)
else:
pol = sentiment.get('polarity')
mag = sentiment.get('magnitude')

if pol is None and mag is not None:
pol = 0
return (pol, mag)


class Document(object):
"""Document class captures a single document of movie reviews."""

def __init__(self, text, doc_id, doc_path):
self.text = text
self.doc_id = doc_id
self.doc_path = doc_path
self.sentence_entity_pair = None
self.sentiment_entity_pair = None
self.label = None

def extract_all_sentences(self, service):
def extract_sentiment_entities(self, service):
"""Extract the sentences in a document."""

if self.sentence_entity_pair is not None:
if self.sentiment_entity_pair is not None:
return self.sentence_entity_pair

docs = service.documents()
request_body = get_request_body(
self.text,
syntax=True,
syntax=False,
entities=True,
sentiment=False)
sentiment=True)
request = docs.annotateText(body=request_body)

ent_list = []

response = request.execute()
entities = response.get('entities', [])
sentences = response.get('sentences', [])

sent_list = [
sentence.get('text', {}).get('content') for sentence in sentences
]
documentSentiment = response.get('documentSentiment', {})

for entity in entities:
ent_type = entity.get('type')
Expand All @@ -120,9 +91,9 @@ def extract_all_sentences(self, service):
if ent_type == 'PERSON' and wiki_url is not None:
ent_list.append(wiki_url)

self.sentence_entity_pair = (sent_list, ent_list)
self.sentiment_entity_pair = (documentSentiment, ent_list)

return self.sentence_entity_pair
return self.sentiment_entity_pair


def to_sentiment_json(doc_id, sent, label):
Expand Down Expand Up @@ -200,18 +171,9 @@ def get_sentiment_entities(service, document):
"""

sentiments, entities = analyze_document(service, document)
score = sentiments.get('score')

sentiments = [sent for sent in sentiments if sent[0] is not None]
negative_sentiments = [
polarity for polarity, magnitude in sentiments if polarity < 0.0]
positive_sentiments = [
polarity for polarity, magnitude in sentiments if polarity > 0.0]

negative = sum(negative_sentiments)
positive = sum(positive_sentiments)
total = positive + negative

return (total, entities)
return (score, entities)


def get_sentiment_label(sentiment):
Expand Down Expand Up @@ -318,8 +280,12 @@ def get_service():
"""Build a client to the Google Cloud Natural Language API."""

credentials = GoogleCredentials.get_application_default()

return discovery.build('language', 'v1beta1',
scoped_credentials = credentials.create_scoped(
['https://www.googleapis.com/auth/cloud-platform'])
http = httplib2.Http()
scoped_credentials.authorize(http)
return discovery.build('language', 'v1',
http=http,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should only need to specify credentials and not http, yeah?

credentials=credentials)


Expand Down
6 changes: 3 additions & 3 deletions language/movie_nl/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,18 @@ def test_process_movie_reviews():
entities = [json.loads(entity) for entity in entities]

# assert sentiments
assert sentiments[0].get('sentiment') == 1.0
assert sentiments[0].get('sentiment') == 0.9
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't these values change as the model changes? Should you just assert that sentiments are some non-None value?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes but they don't happen that frequently. And so above check allows us to see if something really failed. Non-None will not allow us to detect some issue model or API might have.

assert sentiments[0].get('label') == 1

assert sentiments[1].get('sentiment') == 1.0
assert sentiments[1].get('sentiment') == 0.9
assert sentiments[1].get('label') == 1

# assert entities
assert len(entities) == 1
assert entities[0].get('name') == 'Tom Cruise'
assert (entities[0].get('wiki_url') ==
'http://en.wikipedia.org/wiki/Tom_Cruise')
assert entities[0].get('sentiment') == 2.0
assert entities[0].get('sentiment') == 1.8


def test_rank_positive_entities(capsys):
Expand Down
6 changes: 4 additions & 2 deletions language/ocr_nl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,12 @@ class TextAnalyzer(object):
def __init__(self, db_filename=None):
credentials = GoogleCredentials.get_application_default()
scoped_credentials = credentials.create_scoped(
['https://www.googleapis.com/auth/cloud-platform'])
['https://www.googleapis.com/auth/cloud-platform'])
http = httplib2.Http()
scoped_credentials.authorize(http)
self.service = discovery.build('language', 'v1beta1', http=http)
self.service = discovery.build('language', 'v1',
http=http,
credentials=credentials)

# This list will store the entity information gleaned from the
# image files.
Expand Down
20 changes: 16 additions & 4 deletions language/sentiment/sentiment_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def main(movie_review_filename):
'''Run a sentiment analysis request on text within a passed filename.'''

credentials = GoogleCredentials.get_application_default()
service = discovery.build('language', 'v1beta1', credentials=credentials)
service = discovery.build('language', 'v1', credentials=credentials)

with open(movie_review_filename, 'r') as review_file:
service_request = service.documents().analyzeSentiment(
Expand All @@ -35,11 +35,23 @@ def main(movie_review_filename):
)
response = service_request.execute()

polarity = response['documentSentiment']['polarity']
score = response['documentSentiment']['score']
magnitude = response['documentSentiment']['magnitude']

print('Sentiment: polarity of {} with magnitude of {}'.format(
polarity, magnitude))
for i, sentence in enumerate(response['sentences']):
sentence_sentiment = sentence['sentiment']['score']
print('Sentence {} has a sentiment score of {}'.format(
i,
sentence_sentiment))

print('Overall Sentiment: score of {} with magnitude of {}'.format(
score,
magnitude)
)
return 0

print('Sentiment: score of {} with magnitude of {}'.format(
score, magnitude))
return 0


Expand Down
14 changes: 7 additions & 7 deletions language/sentiment/sentiment_analysis_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,25 @@
def test_pos(resource, capsys):
main(resource('pos.txt'))
out, err = capsys.readouterr()
polarity = float(re.search('polarity of (.+?) with', out).group(1))
score = float(re.search('score of (.+?) with', out).group(1))
magnitude = float(re.search('magnitude of (.+?)', out).group(1))
assert polarity * magnitude > 0
assert score * magnitude > 0


def test_neg(resource, capsys):
main(resource('neg.txt'))
out, err = capsys.readouterr()
polarity = float(re.search('polarity of (.+?) with', out).group(1))
score = float(re.search('score of (.+?) with', out).group(1))
magnitude = float(re.search('magnitude of (.+?)', out).group(1))
assert polarity * magnitude < 0
assert score * magnitude < 0


def test_mixed(resource, capsys):
main(resource('mixed.txt'))
out, err = capsys.readouterr()
polarity = float(re.search('polarity of (.+?) with', out).group(1))
assert polarity <= 0.3
assert polarity >= -0.3
score = float(re.search('score of (.+?) with', out).group(1))
assert score <= 0.3
assert score >= -0.3


def test_neutral(resource, capsys):
Expand Down