Skip to content

Commit

Permalink
Fixes for text encoding [(#913)](GoogleCloudPlatform/python-docs-samp…
Browse files Browse the repository at this point in the history
…les#913)

* Fixes for non-ASCII encodings

* Adds test for UTF

* Style fix
  • Loading branch information
gguuss authored and busunkim96 committed Sep 29, 2020
1 parent 24905ee commit 78f4bc9
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@
import argparse

from google.cloud import language
import six


def sentiment_text(text):
"""Detects sentiment in the text."""
language_client = language.Client()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

Expand Down Expand Up @@ -60,6 +64,9 @@ def entities_text(text):
"""Detects entities in the text."""
language_client = language.Client()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

Expand All @@ -69,11 +76,11 @@ def entities_text(text):

for entity in entities:
print('=' * 20)
print('{:<16}: {}'.format('name', entity.name))
print('{:<16}: {}'.format('type', entity.entity_type))
print('{:<16}: {}'.format('metadata', entity.metadata))
print('{:<16}: {}'.format('salience', entity.salience))
print('{:<16}: {}'.format('wikipedia_url',
print(u'{:<16}: {}'.format('name', entity.name))
print(u'{:<16}: {}'.format('type', entity.entity_type))
print(u'{:<16}: {}'.format('metadata', entity.metadata))
print(u'{:<16}: {}'.format('salience', entity.salience))
print(u'{:<16}: {}'.format('wikipedia_url',
entity.metadata.get('wikipedia_url', '-')))


Expand All @@ -90,18 +97,21 @@ def entities_file(gcs_uri):

for entity in entities:
print('=' * 20)
print('{:<16}: {}'.format('name', entity.name))
print('{:<16}: {}'.format('type', entity.entity_type))
print('{:<16}: {}'.format('metadata', entity.metadata))
print('{:<16}: {}'.format('salience', entity.salience))
print('{:<16}: {}'.format('wikipedia_url',
print(u'{:<16}: {}'.format('name', entity.name))
print(u'{:<16}: {}'.format('type', entity.entity_type))
print(u'{:<16}: {}'.format('metadata', entity.metadata))
print(u'{:<16}: {}'.format('salience', entity.salience))
print(u'{:<16}: {}'.format('wikipedia_url',
entity.metadata.get('wikipedia_url', '-')))


def syntax_text(text):
"""Detects syntax in the text."""
language_client = language.Client()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

Expand All @@ -110,7 +120,7 @@ def syntax_text(text):
tokens = document.analyze_syntax().tokens

for token in tokens:
print('{}: {}'.format(token.part_of_speech, token.text_content))
print(u'{}: {}'.format(token.part_of_speech, token.text_content))


def syntax_file(gcs_uri):
Expand All @@ -125,7 +135,7 @@ def syntax_file(gcs_uri):
tokens = document.analyze_syntax().tokens

for token in tokens:
print('{}: {}'.format(token.part_of_speech, token.text_content))
print(u'{}: {}'.format(token.part_of_speech, token.text_content))


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,25 @@
from google.cloud.gapic.language.v1beta2 import enums
from google.cloud.gapic.language.v1beta2 import language_service_client
from google.cloud.proto.language.v1beta2 import language_service_pb2
import six


def sentiment_text(text):
"""Detects sentiment in the text."""
language_client = language.Client(api_version='v1beta2')

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

# Detects sentiment in the document. You can also analyze HTML with:
# document.doc_type == language.Document.HTML
sentiment = document.analyze_sentiment().sentiment

print('Score: {}'.format(sentiment.score))
print('Magnitude: {}'.format(sentiment.magnitude))
print(u'Score: {}'.format(sentiment.score))
print(u'Magnitude: {}'.format(sentiment.magnitude))


def sentiment_file(gcs_uri):
Expand All @@ -55,14 +59,17 @@ def sentiment_file(gcs_uri):
# document.doc_type == language.Document.HTML
sentiment = document.analyze_sentiment().sentiment

print('Score: {}'.format(sentiment.score))
print('Magnitude: {}'.format(sentiment.magnitude))
print(u'Score: {}'.format(sentiment.score))
print(u'Magnitude: {}'.format(sentiment.magnitude))


def entities_text(text):
"""Detects entities in the text."""
language_client = language.Client(api_version='v1beta2')

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

Expand All @@ -71,12 +78,12 @@ def entities_text(text):
entities = document.analyze_entities().entities

for entity in entities:
print('=' * 20)
print('{:<16}: {}'.format('name', entity.name))
print('{:<16}: {}'.format('type', entity.entity_type))
print('{:<16}: {}'.format('metadata', entity.metadata))
print('{:<16}: {}'.format('salience', entity.salience))
print('{:<16}: {}'.format('wikipedia_url',
print(u'=' * 20)
print(u'{:<16}: {}'.format('name', entity.name))
print(u'{:<16}: {}'.format('type', entity.entity_type))
print(u'{:<16}: {}'.format('metadata', entity.metadata))
print(u'{:<16}: {}'.format('salience', entity.salience))
print(u'{:<16}: {}'.format('wikipedia_url',
entity.metadata.get('wikipedia_url', '-')))


Expand Down Expand Up @@ -105,6 +112,9 @@ def syntax_text(text):
"""Detects syntax in the text."""
language_client = language.Client(api_version='v1beta2')

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

Expand All @@ -113,7 +123,7 @@ def syntax_text(text):
tokens = document.analyze_syntax().tokens

for token in tokens:
print('{}: {}'.format(token.part_of_speech, token.text_content))
print(u'{}: {}'.format(token.part_of_speech, token.text_content))


def syntax_file(gcs_uri):
Expand All @@ -128,14 +138,17 @@ def syntax_file(gcs_uri):
tokens = document.analyze_syntax().tokens

for token in tokens:
print('{}: {}'.format(token.part_of_speech, token.text_content))
print(u'{}: {}'.format(token.part_of_speech, token.text_content))


def entity_sentiment_text(text):
"""Detects entity sentiment in the provided text."""
language_client = language_service_client.LanguageServiceClient()
document = language_service_pb2.Document()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

document.content = text.encode('utf-8')
document.type = enums.Document.Type.PLAIN_TEXT

Expand All @@ -144,15 +157,15 @@ def entity_sentiment_text(text):

for entity in result.entities:
print('Mentions: ')
print('Name: "{}"'.format(entity.name))
print(u'Name: "{}"'.format(entity.name))
for mention in entity.mentions:
print(' Begin Offset : {}'.format(mention.text.begin_offset))
print(' Content : {}'.format(mention.text.content))
print(' Magnitude : {}'.format(mention.sentiment.magnitude))
print(' Sentiment : {}'.format(mention.sentiment.score))
print(' Type : {}'.format(mention.type))
print('Salience: {}'.format(entity.salience))
print('Sentiment: {}\n'.format(entity.sentiment))
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
print(u' Content : {}'.format(mention.text.content))
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
print(u' Sentiment : {}'.format(mention.sentiment.score))
print(u' Type : {}'.format(mention.type))
print(u'Salience: {}'.format(entity.salience))
print(u'Sentiment: {}\n'.format(entity.sentiment))


def entity_sentiment_file(gcs_uri):
Expand All @@ -167,15 +180,15 @@ def entity_sentiment_file(gcs_uri):
document, enums.EncodingType.UTF8)

for entity in result.entities:
print('Name: "{}"'.format(entity.name))
print(u'Name: "{}"'.format(entity.name))
for mention in entity.mentions:
print(' Begin Offset : {}'.format(mention.text.begin_offset))
print(' Content : {}'.format(mention.text.content))
print(' Magnitude : {}'.format(mention.sentiment.magnitude))
print(' Sentiment : {}'.format(mention.sentiment.score))
print(' Type : {}'.format(mention.type))
print('Salience: {}'.format(entity.salience))
print('Sentiment: {}\n'.format(entity.sentiment))
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
print(u' Content : {}'.format(mention.text.content))
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
print(u' Sentiment : {}'.format(mention.sentiment.score))
print(u' Type : {}'.format(mention.type))
print(u'Salience: {}'.format(entity.salience))
print(u'Sentiment: {}\n'.format(entity.sentiment))


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# Copyright 2017 Google, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -26,6 +27,15 @@ def test_sentiment_text(capsys):
assert 'Score: 0' in out


def test_sentiment_utf(capsys):
snippets.sentiment_text(
u'1er site d\'information. Les articles du journal et toute l\'' +
u'actualité en continu : International, France, Société, Economie, ' +
u'Culture, Environnement')
out, _ = capsys.readouterr()
assert 'Score: 0' in out


def test_sentiment_file(capsys):
snippets.sentiment_file(TEST_FILE_URL)
out, _ = capsys.readouterr()
Expand Down

0 comments on commit 78f4bc9

Please sign in to comment.