-
Notifications
You must be signed in to change notification settings - Fork 6.5k
Movie sample changes (in progress) - DO NOT MERGE before Nov 15. #597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
1dd8560
2753f2f
1e8f574
1a61c15
f9aa66c
3eb8497
5287e47
c3f44ab
884f431
531b256
0df605c
6d25d40
9b5c64b
affaf19
0c60cb5
a63ab55
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,19 +21,20 @@ | |
|
||
from googleapiclient import discovery | ||
from googleapiclient.errors import HttpError | ||
import httplib2 | ||
from oauth2client.client import GoogleCredentials | ||
import requests | ||
|
||
# TODO REMOVE - when discovery is public | ||
DISCOVERY_URL = ('https://language.googleapis.com/$discovery/rest?' | ||
'version=v1&labels=GOOGLE_INTERNAL') | ||
|
||
def analyze_document(service, document): | ||
"""Analyze the document and get the distribution of sentiments and | ||
the movie name.""" | ||
logging.info('Analyzing {}'.format(document.doc_id)) | ||
|
||
sentences, entities = document.extract_all_sentences(service) | ||
|
||
sentiments = [get_sentiment(service, sentence) for sentence in sentences] | ||
|
||
sentiments, entities = document.extract_sentiment_entities(service) | ||
return sentiments, entities | ||
|
||
|
||
|
@@ -56,62 +57,35 @@ def get_request_body(text, syntax=True, entities=True, sentiment=True): | |
return body | ||
|
||
|
||
def get_sentiment(service, sentence): | ||
"""Get the sentence-level sentiment.""" | ||
body = get_request_body( | ||
sentence, syntax=False, entities=True, sentiment=True) | ||
|
||
docs = service.documents() | ||
request = docs.annotateText(body=body) | ||
|
||
response = request.execute(num_retries=3) | ||
|
||
sentiment = response.get('documentSentiment') | ||
|
||
if sentiment is None: | ||
return (None, None) | ||
else: | ||
pol = sentiment.get('polarity') | ||
mag = sentiment.get('magnitude') | ||
|
||
if pol is None and mag is not None: | ||
pol = 0 | ||
return (pol, mag) | ||
|
||
|
||
class Document(object): | ||
"""Document class captures a single document of movie reviews.""" | ||
|
||
def __init__(self, text, doc_id, doc_path): | ||
self.text = text | ||
self.doc_id = doc_id | ||
self.doc_path = doc_path | ||
self.sentence_entity_pair = None | ||
self.sentiment_entity_pair = None | ||
self.label = None | ||
|
||
def extract_all_sentences(self, service): | ||
def extract_sentiment_entities(self, service): | ||
"""Extract the sentences in a document.""" | ||
|
||
if self.sentence_entity_pair is not None: | ||
if self.sentiment_entity_pair is not None: | ||
return self.sentence_entity_pair | ||
|
||
docs = service.documents() | ||
request_body = get_request_body( | ||
self.text, | ||
syntax=True, | ||
syntax=False, | ||
entities=True, | ||
sentiment=False) | ||
sentiment=True) | ||
request = docs.annotateText(body=request_body) | ||
|
||
ent_list = [] | ||
|
||
response = request.execute() | ||
entities = response.get('entities', []) | ||
sentences = response.get('sentences', []) | ||
|
||
sent_list = [ | ||
sentence.get('text', {}).get('content') for sentence in sentences | ||
] | ||
documentSentiment = response.get('documentSentiment', {}) | ||
|
||
for entity in entities: | ||
ent_type = entity.get('type') | ||
|
@@ -120,9 +94,9 @@ def extract_all_sentences(self, service): | |
if ent_type == 'PERSON' and wiki_url is not None: | ||
ent_list.append(wiki_url) | ||
|
||
self.sentence_entity_pair = (sent_list, ent_list) | ||
self.sentiment_entity_pair = (documentSentiment, ent_list) | ||
|
||
return self.sentence_entity_pair | ||
return self.sentiment_entity_pair | ||
|
||
|
||
def to_sentiment_json(doc_id, sent, label): | ||
|
@@ -200,18 +174,9 @@ def get_sentiment_entities(service, document): | |
""" | ||
|
||
sentiments, entities = analyze_document(service, document) | ||
score = sentiments.get('score') | ||
|
||
sentiments = [sent for sent in sentiments if sent[0] is not None] | ||
negative_sentiments = [ | ||
polarity for polarity, magnitude in sentiments if polarity < 0.0] | ||
positive_sentiments = [ | ||
polarity for polarity, magnitude in sentiments if polarity > 0.0] | ||
|
||
negative = sum(negative_sentiments) | ||
positive = sum(positive_sentiments) | ||
total = positive + negative | ||
|
||
return (total, entities) | ||
return (score, entities) | ||
|
||
|
||
def get_sentiment_label(sentiment): | ||
|
@@ -315,12 +280,16 @@ def rank_entities(reader, sentiment=None, topn=None, reverse_bool=False): | |
|
||
|
||
def get_service(): | ||
"""Build a client to the Google Cloud Natural Language API.""" | ||
""""Build a client to the Google Cloud Natural Language API.""" | ||
|
||
credentials = GoogleCredentials.get_application_default() | ||
|
||
return discovery.build('language', 'v1beta1', | ||
credentials=credentials) | ||
scoped_credentials = credentials.create_scoped( | ||
['https://www.googleapis.com/auth/cloud-platform']) | ||
http = httplib2.Http() | ||
scoped_credentials.authorize(http) | ||
return discovery.build('language', 'v1', | ||
http=http, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should only need to specify |
||
discoveryServiceUrl=DISCOVERY_URL) | ||
|
||
|
||
def analyze(input_dir, sentiment_writer, entity_writer, sample, log_file): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,18 +69,18 @@ def test_process_movie_reviews(): | |
entities = [json.loads(entity) for entity in entities] | ||
|
||
# assert sentiments | ||
assert sentiments[0].get('sentiment') == 1.0 | ||
assert sentiments[0].get('sentiment') == 0.9 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't these values change as the model changes? Should you just assert that sentiments are some non-None value? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes but they don't happen that frequently. And so above check allows us to see if something really failed. Non-None will not allow us to detect some issue model or API might have. |
||
assert sentiments[0].get('label') == 1 | ||
|
||
assert sentiments[1].get('sentiment') == 1.0 | ||
assert sentiments[1].get('sentiment') == 0.8 | ||
assert sentiments[1].get('label') == 1 | ||
|
||
# assert entities | ||
assert len(entities) == 1 | ||
assert entities[0].get('name') == 'Tom Cruise' | ||
assert (entities[0].get('wiki_url') == | ||
'http://en.wikipedia.org/wiki/Tom_Cruise') | ||
assert entities[0].get('sentiment') == 2.0 | ||
assert entities[0].get('sentiment') == 1.7 | ||
|
||
|
||
def test_rank_positive_entities(capsys): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
extra
"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done