Skip to content

Commit aa7f3a8

Browse files
puneithJon Wayne Parrott
authored and
Jon Wayne Parrott
committed
Movie sample changes
1 parent a236a5a commit aa7f3a8

File tree

5 files changed

+48
-68
lines changed

5 files changed

+48
-68
lines changed

language/movie_nl/main.py

Lines changed: 18 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
from googleapiclient import discovery
2323
from googleapiclient.errors import HttpError
24+
import httplib2
2425
from oauth2client.client import GoogleCredentials
2526
import requests
2627

@@ -30,10 +31,7 @@ def analyze_document(service, document):
3031
the movie name."""
3132
logging.info('Analyzing {}'.format(document.doc_id))
3233

33-
sentences, entities = document.extract_all_sentences(service)
34-
35-
sentiments = [get_sentiment(service, sentence) for sentence in sentences]
36-
34+
sentiments, entities = document.extract_sentiment_entities(service)
3735
return sentiments, entities
3836

3937

@@ -56,62 +54,35 @@ def get_request_body(text, syntax=True, entities=True, sentiment=True):
5654
return body
5755

5856

59-
def get_sentiment(service, sentence):
60-
"""Get the sentence-level sentiment."""
61-
body = get_request_body(
62-
sentence, syntax=False, entities=True, sentiment=True)
63-
64-
docs = service.documents()
65-
request = docs.annotateText(body=body)
66-
67-
response = request.execute(num_retries=3)
68-
69-
sentiment = response.get('documentSentiment')
70-
71-
if sentiment is None:
72-
return (None, None)
73-
else:
74-
pol = sentiment.get('polarity')
75-
mag = sentiment.get('magnitude')
76-
77-
if pol is None and mag is not None:
78-
pol = 0
79-
return (pol, mag)
80-
81-
8257
class Document(object):
8358
"""Document class captures a single document of movie reviews."""
8459

8560
def __init__(self, text, doc_id, doc_path):
8661
self.text = text
8762
self.doc_id = doc_id
8863
self.doc_path = doc_path
89-
self.sentence_entity_pair = None
64+
self.sentiment_entity_pair = None
9065
self.label = None
9166

92-
def extract_all_sentences(self, service):
67+
def extract_sentiment_entities(self, service):
9368
"""Extract the sentences in a document."""
9469

95-
if self.sentence_entity_pair is not None:
70+
if self.sentiment_entity_pair is not None:
9671
return self.sentence_entity_pair
9772

9873
docs = service.documents()
9974
request_body = get_request_body(
10075
self.text,
101-
syntax=True,
76+
syntax=False,
10277
entities=True,
103-
sentiment=False)
78+
sentiment=True)
10479
request = docs.annotateText(body=request_body)
10580

10681
ent_list = []
10782

10883
response = request.execute()
10984
entities = response.get('entities', [])
110-
sentences = response.get('sentences', [])
111-
112-
sent_list = [
113-
sentence.get('text', {}).get('content') for sentence in sentences
114-
]
85+
documentSentiment = response.get('documentSentiment', {})
11586

11687
for entity in entities:
11788
ent_type = entity.get('type')
@@ -120,9 +91,9 @@ def extract_all_sentences(self, service):
12091
if ent_type == 'PERSON' and wiki_url is not None:
12192
ent_list.append(wiki_url)
12293

123-
self.sentence_entity_pair = (sent_list, ent_list)
94+
self.sentiment_entity_pair = (documentSentiment, ent_list)
12495

125-
return self.sentence_entity_pair
96+
return self.sentiment_entity_pair
12697

12798

12899
def to_sentiment_json(doc_id, sent, label):
@@ -200,18 +171,9 @@ def get_sentiment_entities(service, document):
200171
"""
201172

202173
sentiments, entities = analyze_document(service, document)
174+
score = sentiments.get('score')
203175

204-
sentiments = [sent for sent in sentiments if sent[0] is not None]
205-
negative_sentiments = [
206-
polarity for polarity, magnitude in sentiments if polarity < 0.0]
207-
positive_sentiments = [
208-
polarity for polarity, magnitude in sentiments if polarity > 0.0]
209-
210-
negative = sum(negative_sentiments)
211-
positive = sum(positive_sentiments)
212-
total = positive + negative
213-
214-
return (total, entities)
176+
return (score, entities)
215177

216178

217179
def get_sentiment_label(sentiment):
@@ -318,8 +280,12 @@ def get_service():
318280
"""Build a client to the Google Cloud Natural Language API."""
319281

320282
credentials = GoogleCredentials.get_application_default()
321-
322-
return discovery.build('language', 'v1beta1',
283+
scoped_credentials = credentials.create_scoped(
284+
['https://www.googleapis.com/auth/cloud-platform'])
285+
http = httplib2.Http()
286+
scoped_credentials.authorize(http)
287+
return discovery.build('language', 'v1',
288+
http=http,
323289
credentials=credentials)
324290

325291

language/movie_nl/main_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,18 +69,18 @@ def test_process_movie_reviews():
6969
entities = [json.loads(entity) for entity in entities]
7070

7171
# assert sentiments
72-
assert sentiments[0].get('sentiment') == 1.0
72+
assert sentiments[0].get('sentiment') == 0.9
7373
assert sentiments[0].get('label') == 1
7474

75-
assert sentiments[1].get('sentiment') == 1.0
75+
assert sentiments[1].get('sentiment') == 0.9
7676
assert sentiments[1].get('label') == 1
7777

7878
# assert entities
7979
assert len(entities) == 1
8080
assert entities[0].get('name') == 'Tom Cruise'
8181
assert (entities[0].get('wiki_url') ==
8282
'http://en.wikipedia.org/wiki/Tom_Cruise')
83-
assert entities[0].get('sentiment') == 2.0
83+
assert entities[0].get('sentiment') == 1.8
8484

8585

8686
def test_rank_positive_entities(capsys):

language/ocr_nl/main.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,12 @@ class TextAnalyzer(object):
115115
def __init__(self, db_filename=None):
116116
credentials = GoogleCredentials.get_application_default()
117117
scoped_credentials = credentials.create_scoped(
118-
['https://www.googleapis.com/auth/cloud-platform'])
118+
['https://www.googleapis.com/auth/cloud-platform'])
119119
http = httplib2.Http()
120120
scoped_credentials.authorize(http)
121-
self.service = discovery.build('language', 'v1beta1', http=http)
121+
self.service = discovery.build('language', 'v1',
122+
http=http,
123+
credentials=credentials)
122124

123125
# This list will store the entity information gleaned from the
124126
# image files.

language/sentiment/sentiment_analysis.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def main(movie_review_filename):
2222
'''Run a sentiment analysis request on text within a passed filename.'''
2323

2424
credentials = GoogleCredentials.get_application_default()
25-
service = discovery.build('language', 'v1beta1', credentials=credentials)
25+
service = discovery.build('language', 'v1', credentials=credentials)
2626

2727
with open(movie_review_filename, 'r') as review_file:
2828
service_request = service.documents().analyzeSentiment(
@@ -35,11 +35,23 @@ def main(movie_review_filename):
3535
)
3636
response = service_request.execute()
3737

38-
polarity = response['documentSentiment']['polarity']
38+
score = response['documentSentiment']['score']
3939
magnitude = response['documentSentiment']['magnitude']
4040

41-
print('Sentiment: polarity of {} with magnitude of {}'.format(
42-
polarity, magnitude))
41+
for i, sentence in enumerate(response['sentences']):
42+
sentence_sentiment = sentence['sentiment']['score']
43+
print('Sentence {} has a sentiment score of {}'.format(
44+
i,
45+
sentence_sentiment))
46+
47+
print('Overall Sentiment: score of {} with magnitude of {}'.format(
48+
score,
49+
magnitude)
50+
)
51+
return 0
52+
53+
print('Sentiment: score of {} with magnitude of {}'.format(
54+
score, magnitude))
4355
return 0
4456

4557

language/sentiment/sentiment_analysis_test.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,25 @@
1818
def test_pos(resource, capsys):
1919
main(resource('pos.txt'))
2020
out, err = capsys.readouterr()
21-
polarity = float(re.search('polarity of (.+?) with', out).group(1))
21+
score = float(re.search('score of (.+?) with', out).group(1))
2222
magnitude = float(re.search('magnitude of (.+?)', out).group(1))
23-
assert polarity * magnitude > 0
23+
assert score * magnitude > 0
2424

2525

2626
def test_neg(resource, capsys):
2727
main(resource('neg.txt'))
2828
out, err = capsys.readouterr()
29-
polarity = float(re.search('polarity of (.+?) with', out).group(1))
29+
score = float(re.search('score of (.+?) with', out).group(1))
3030
magnitude = float(re.search('magnitude of (.+?)', out).group(1))
31-
assert polarity * magnitude < 0
31+
assert score * magnitude < 0
3232

3333

3434
def test_mixed(resource, capsys):
3535
main(resource('mixed.txt'))
3636
out, err = capsys.readouterr()
37-
polarity = float(re.search('polarity of (.+?) with', out).group(1))
38-
assert polarity <= 0.3
39-
assert polarity >= -0.3
37+
score = float(re.search('score of (.+?) with', out).group(1))
38+
assert score <= 0.3
39+
assert score >= -0.3
4040

4141

4242
def test_neutral(resource, capsys):

0 commit comments

Comments
 (0)