diff --git a/language/movie_nl/main.py b/language/movie_nl/main.py index ba5c63b60b9..d6ef5d169a5 100644 --- a/language/movie_nl/main.py +++ b/language/movie_nl/main.py @@ -21,6 +21,7 @@ from googleapiclient import discovery from googleapiclient.errors import HttpError +import httplib2 from oauth2client.client import GoogleCredentials import requests @@ -30,10 +31,7 @@ def analyze_document(service, document): the movie name.""" logging.info('Analyzing {}'.format(document.doc_id)) - sentences, entities = document.extract_all_sentences(service) - - sentiments = [get_sentiment(service, sentence) for sentence in sentences] - + sentiments, entities = document.extract_sentiment_entities(service) return sentiments, entities @@ -56,29 +54,6 @@ def get_request_body(text, syntax=True, entities=True, sentiment=True): return body -def get_sentiment(service, sentence): - """Get the sentence-level sentiment.""" - body = get_request_body( - sentence, syntax=False, entities=True, sentiment=True) - - docs = service.documents() - request = docs.annotateText(body=body) - - response = request.execute(num_retries=3) - - sentiment = response.get('documentSentiment') - - if sentiment is None: - return (None, None) - else: - pol = sentiment.get('polarity') - mag = sentiment.get('magnitude') - - if pol is None and mag is not None: - pol = 0 - return (pol, mag) - - class Document(object): """Document class captures a single document of movie reviews.""" @@ -86,32 +61,28 @@ def __init__(self, text, doc_id, doc_path): self.text = text self.doc_id = doc_id self.doc_path = doc_path - self.sentence_entity_pair = None + self.sentiment_entity_pair = None self.label = None - def extract_all_sentences(self, service): + def extract_sentiment_entities(self, service): """Extract the sentences in a document.""" - if self.sentence_entity_pair is not None: + if self.sentiment_entity_pair is not None: return self.sentence_entity_pair docs = service.documents() request_body = get_request_body( self.text, - syntax=True, + syntax=False, entities=True, - sentiment=False) + sentiment=True) request = docs.annotateText(body=request_body) ent_list = [] response = request.execute() entities = response.get('entities', []) - sentences = response.get('sentences', []) - - sent_list = [ - sentence.get('text', {}).get('content') for sentence in sentences - ] + documentSentiment = response.get('documentSentiment', {}) for entity in entities: ent_type = entity.get('type') @@ -120,9 +91,9 @@ def extract_all_sentences(self, service): if ent_type == 'PERSON' and wiki_url is not None: ent_list.append(wiki_url) - self.sentence_entity_pair = (sent_list, ent_list) + self.sentiment_entity_pair = (documentSentiment, ent_list) - return self.sentence_entity_pair + return self.sentiment_entity_pair def to_sentiment_json(doc_id, sent, label): @@ -200,18 +171,9 @@ def get_sentiment_entities(service, document): """ sentiments, entities = analyze_document(service, document) + score = sentiments.get('score') - sentiments = [sent for sent in sentiments if sent[0] is not None] - negative_sentiments = [ - polarity for polarity, magnitude in sentiments if polarity < 0.0] - positive_sentiments = [ - polarity for polarity, magnitude in sentiments if polarity > 0.0] - - negative = sum(negative_sentiments) - positive = sum(positive_sentiments) - total = positive + negative - - return (total, entities) + return (score, entities) def get_sentiment_label(sentiment): @@ -318,8 +280,12 @@ def get_service(): """Build a client to the Google Cloud Natural Language API.""" credentials = GoogleCredentials.get_application_default() - - return discovery.build('language', 'v1beta1', + scoped_credentials = credentials.create_scoped( + ['https://www.googleapis.com/auth/cloud-platform']) + http = httplib2.Http() + scoped_credentials.authorize(http) + return discovery.build('language', 'v1', + http=http, credentials=credentials) diff --git a/language/movie_nl/main_test.py b/language/movie_nl/main_test.py index 8e22a1da34e..74c62eb382a 100644 --- a/language/movie_nl/main_test.py +++ b/language/movie_nl/main_test.py @@ -69,10 +69,10 @@ def test_process_movie_reviews(): entities = [json.loads(entity) for entity in entities] # assert sentiments - assert sentiments[0].get('sentiment') == 1.0 + assert sentiments[0].get('sentiment') == 0.9 assert sentiments[0].get('label') == 1 - assert sentiments[1].get('sentiment') == 1.0 + assert sentiments[1].get('sentiment') == 0.9 assert sentiments[1].get('label') == 1 # assert entities @@ -80,7 +80,7 @@ def test_process_movie_reviews(): assert entities[0].get('name') == 'Tom Cruise' assert (entities[0].get('wiki_url') == 'http://en.wikipedia.org/wiki/Tom_Cruise') - assert entities[0].get('sentiment') == 2.0 + assert entities[0].get('sentiment') == 1.8 def test_rank_positive_entities(capsys): diff --git a/language/ocr_nl/main.py b/language/ocr_nl/main.py index 6e329f53386..03fbdf9d458 100755 --- a/language/ocr_nl/main.py +++ b/language/ocr_nl/main.py @@ -115,10 +115,12 @@ class TextAnalyzer(object): def __init__(self, db_filename=None): credentials = GoogleCredentials.get_application_default() scoped_credentials = credentials.create_scoped( - ['https://www.googleapis.com/auth/cloud-platform']) + ['https://www.googleapis.com/auth/cloud-platform']) http = httplib2.Http() scoped_credentials.authorize(http) - self.service = discovery.build('language', 'v1beta1', http=http) + self.service = discovery.build('language', 'v1', + http=http, + credentials=credentials) # This list will store the entity information gleaned from the # image files. diff --git a/language/sentiment/sentiment_analysis.py b/language/sentiment/sentiment_analysis.py index 8e250881305..31a8b88c0c0 100644 --- a/language/sentiment/sentiment_analysis.py +++ b/language/sentiment/sentiment_analysis.py @@ -22,7 +22,7 @@ def main(movie_review_filename): '''Run a sentiment analysis request on text within a passed filename.''' credentials = GoogleCredentials.get_application_default() - service = discovery.build('language', 'v1beta1', credentials=credentials) + service = discovery.build('language', 'v1', credentials=credentials) with open(movie_review_filename, 'r') as review_file: service_request = service.documents().analyzeSentiment( @@ -35,11 +35,23 @@ def main(movie_review_filename): ) response = service_request.execute() - polarity = response['documentSentiment']['polarity'] + score = response['documentSentiment']['score'] magnitude = response['documentSentiment']['magnitude'] - print('Sentiment: polarity of {} with magnitude of {}'.format( - polarity, magnitude)) + for i, sentence in enumerate(response['sentences']): + sentence_sentiment = sentence['sentiment']['score'] + print('Sentence {} has a sentiment score of {}'.format( + i, + sentence_sentiment)) + + print('Overall Sentiment: score of {} with magnitude of {}'.format( + score, + magnitude) + ) + return 0 + + print('Sentiment: score of {} with magnitude of {}'.format( + score, magnitude)) return 0 diff --git a/language/sentiment/sentiment_analysis_test.py b/language/sentiment/sentiment_analysis_test.py index d6b6a7abfea..ff28211944e 100644 --- a/language/sentiment/sentiment_analysis_test.py +++ b/language/sentiment/sentiment_analysis_test.py @@ -18,25 +18,25 @@ def test_pos(resource, capsys): main(resource('pos.txt')) out, err = capsys.readouterr() - polarity = float(re.search('polarity of (.+?) with', out).group(1)) + score = float(re.search('score of (.+?) with', out).group(1)) magnitude = float(re.search('magnitude of (.+?)', out).group(1)) - assert polarity * magnitude > 0 + assert score * magnitude > 0 def test_neg(resource, capsys): main(resource('neg.txt')) out, err = capsys.readouterr() - polarity = float(re.search('polarity of (.+?) with', out).group(1)) + score = float(re.search('score of (.+?) with', out).group(1)) magnitude = float(re.search('magnitude of (.+?)', out).group(1)) - assert polarity * magnitude < 0 + assert score * magnitude < 0 def test_mixed(resource, capsys): main(resource('mixed.txt')) out, err = capsys.readouterr() - polarity = float(re.search('polarity of (.+?) with', out).group(1)) - assert polarity <= 0.3 - assert polarity >= -0.3 + score = float(re.search('score of (.+?) with', out).group(1)) + assert score <= 0.3 + assert score >= -0.3 def test_neutral(resource, capsys):