Fixes for text encoding [(#913)](GoogleCloudPlatform/python-docs-samp…

…les#913) * Fixes for non-ASCII encodings * Adds test for UTF * Style fix
googleapis · Sep 29, 2020 · 78f4bc9 · 78f4bc9
1 parent 24905ee
commit 78f4bc9
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 40 deletions.
diff --git a/packages/google-cloud-language/samples/snippets/cloud-client/v1/snippets.py b/packages/google-cloud-language/samples/snippets/cloud-client/v1/snippets.py
@@ -24,12 +24,16 @@
 import argparse
 
 from google.cloud import language
+import six
 
 
 def sentiment_text(text):
     """Detects sentiment in the text."""
     language_client = language.Client()
 
+    if isinstance(text, six.binary_type):
+        text = text.decode('utf-8')
+
     # Instantiates a plain text document.
     document = language_client.document_from_text(text)
 
@@ -60,6 +64,9 @@ def entities_text(text):
     """Detects entities in the text."""
     language_client = language.Client()
 
+    if isinstance(text, six.binary_type):
+        text = text.decode('utf-8')
+
     # Instantiates a plain text document.
     document = language_client.document_from_text(text)
 
@@ -69,11 +76,11 @@ def entities_text(text):
 
     for entity in entities:
         print('=' * 20)
-        print('{:<16}: {}'.format('name', entity.name))
-        print('{:<16}: {}'.format('type', entity.entity_type))
-        print('{:<16}: {}'.format('metadata', entity.metadata))
-        print('{:<16}: {}'.format('salience', entity.salience))
-        print('{:<16}: {}'.format('wikipedia_url',
+        print(u'{:<16}: {}'.format('name', entity.name))
+        print(u'{:<16}: {}'.format('type', entity.entity_type))
+        print(u'{:<16}: {}'.format('metadata', entity.metadata))
+        print(u'{:<16}: {}'.format('salience', entity.salience))
+        print(u'{:<16}: {}'.format('wikipedia_url',
               entity.metadata.get('wikipedia_url', '-')))
 
 
@@ -90,18 +97,21 @@ def entities_file(gcs_uri):
 
     for entity in entities:
         print('=' * 20)
-        print('{:<16}: {}'.format('name', entity.name))
-        print('{:<16}: {}'.format('type', entity.entity_type))
-        print('{:<16}: {}'.format('metadata', entity.metadata))
-        print('{:<16}: {}'.format('salience', entity.salience))
-        print('{:<16}: {}'.format('wikipedia_url',
+        print(u'{:<16}: {}'.format('name', entity.name))
+        print(u'{:<16}: {}'.format('type', entity.entity_type))
+        print(u'{:<16}: {}'.format('metadata', entity.metadata))
+        print(u'{:<16}: {}'.format('salience', entity.salience))
+        print(u'{:<16}: {}'.format('wikipedia_url',
               entity.metadata.get('wikipedia_url', '-')))
 
 
 def syntax_text(text):
     """Detects syntax in the text."""
     language_client = language.Client()
 
+    if isinstance(text, six.binary_type):
+        text = text.decode('utf-8')
+
     # Instantiates a plain text document.
     document = language_client.document_from_text(text)
 
@@ -110,7 +120,7 @@ def syntax_text(text):
     tokens = document.analyze_syntax().tokens
 
     for token in tokens:
-        print('{}: {}'.format(token.part_of_speech, token.text_content))
+        print(u'{}: {}'.format(token.part_of_speech, token.text_content))
 
 
 def syntax_file(gcs_uri):
@@ -125,7 +135,7 @@ def syntax_file(gcs_uri):
     tokens = document.analyze_syntax().tokens
 
     for token in tokens:
-        print('{}: {}'.format(token.part_of_speech, token.text_content))
+        print(u'{}: {}'.format(token.part_of_speech, token.text_content))
 
 
 if __name__ == '__main__':

diff --git a/packages/google-cloud-language/samples/snippets/cloud-client/v1beta2/snippets.py b/packages/google-cloud-language/samples/snippets/cloud-client/v1beta2/snippets.py
@@ -27,21 +27,25 @@
 from google.cloud.gapic.language.v1beta2 import enums
 from google.cloud.gapic.language.v1beta2 import language_service_client
 from google.cloud.proto.language.v1beta2 import language_service_pb2
+import six
 
 
 def sentiment_text(text):
     """Detects sentiment in the text."""
     language_client = language.Client(api_version='v1beta2')
 
+    if isinstance(text, six.binary_type):
+        text = text.decode('utf-8')
+
     # Instantiates a plain text document.
     document = language_client.document_from_text(text)
 
     # Detects sentiment in the document. You can also analyze HTML with:
     #   document.doc_type == language.Document.HTML
     sentiment = document.analyze_sentiment().sentiment
 
-    print('Score: {}'.format(sentiment.score))
-    print('Magnitude: {}'.format(sentiment.magnitude))
+    print(u'Score: {}'.format(sentiment.score))
+    print(u'Magnitude: {}'.format(sentiment.magnitude))
 
 
 def sentiment_file(gcs_uri):
@@ -55,14 +59,17 @@ def sentiment_file(gcs_uri):
     #   document.doc_type == language.Document.HTML
     sentiment = document.analyze_sentiment().sentiment
 
-    print('Score: {}'.format(sentiment.score))
-    print('Magnitude: {}'.format(sentiment.magnitude))
+    print(u'Score: {}'.format(sentiment.score))
+    print(u'Magnitude: {}'.format(sentiment.magnitude))
 
 
 def entities_text(text):
     """Detects entities in the text."""
     language_client = language.Client(api_version='v1beta2')
 
+    if isinstance(text, six.binary_type):
+        text = text.decode('utf-8')
+
     # Instantiates a plain text document.
     document = language_client.document_from_text(text)
 
@@ -71,12 +78,12 @@ def entities_text(text):
     entities = document.analyze_entities().entities
 
     for entity in entities:
-        print('=' * 20)
-        print('{:<16}: {}'.format('name', entity.name))
-        print('{:<16}: {}'.format('type', entity.entity_type))
-        print('{:<16}: {}'.format('metadata', entity.metadata))
-        print('{:<16}: {}'.format('salience', entity.salience))
-        print('{:<16}: {}'.format('wikipedia_url',
+        print(u'=' * 20)
+        print(u'{:<16}: {}'.format('name', entity.name))
+        print(u'{:<16}: {}'.format('type', entity.entity_type))
+        print(u'{:<16}: {}'.format('metadata', entity.metadata))
+        print(u'{:<16}: {}'.format('salience', entity.salience))
+        print(u'{:<16}: {}'.format('wikipedia_url',
               entity.metadata.get('wikipedia_url', '-')))
 
 
@@ -105,6 +112,9 @@ def syntax_text(text):
     """Detects syntax in the text."""
     language_client = language.Client(api_version='v1beta2')
 
+    if isinstance(text, six.binary_type):
+        text = text.decode('utf-8')
+
     # Instantiates a plain text document.
     document = language_client.document_from_text(text)
 
@@ -113,7 +123,7 @@ def syntax_text(text):
     tokens = document.analyze_syntax().tokens
 
     for token in tokens:
-        print('{}: {}'.format(token.part_of_speech, token.text_content))
+        print(u'{}: {}'.format(token.part_of_speech, token.text_content))
 
 
 def syntax_file(gcs_uri):
@@ -128,14 +138,17 @@ def syntax_file(gcs_uri):
     tokens = document.analyze_syntax().tokens
 
     for token in tokens:
-        print('{}: {}'.format(token.part_of_speech, token.text_content))
+        print(u'{}: {}'.format(token.part_of_speech, token.text_content))
 
 
 def entity_sentiment_text(text):
     """Detects entity sentiment in the provided text."""
     language_client = language_service_client.LanguageServiceClient()
     document = language_service_pb2.Document()
 
+    if isinstance(text, six.binary_type):
+        text = text.decode('utf-8')
+
     document.content = text.encode('utf-8')
     document.type = enums.Document.Type.PLAIN_TEXT
 
@@ -144,15 +157,15 @@ def entity_sentiment_text(text):
 
     for entity in result.entities:
         print('Mentions: ')
-        print('Name: "{}"'.format(entity.name))
+        print(u'Name: "{}"'.format(entity.name))
         for mention in entity.mentions:
-            print('  Begin Offset : {}'.format(mention.text.begin_offset))
-            print('  Content : {}'.format(mention.text.content))
-            print('  Magnitude : {}'.format(mention.sentiment.magnitude))
-            print('  Sentiment : {}'.format(mention.sentiment.score))
-            print('  Type : {}'.format(mention.type))
-        print('Salience: {}'.format(entity.salience))
-        print('Sentiment: {}\n'.format(entity.sentiment))
+            print(u'  Begin Offset : {}'.format(mention.text.begin_offset))
+            print(u'  Content : {}'.format(mention.text.content))
+            print(u'  Magnitude : {}'.format(mention.sentiment.magnitude))
+            print(u'  Sentiment : {}'.format(mention.sentiment.score))
+            print(u'  Type : {}'.format(mention.type))
+        print(u'Salience: {}'.format(entity.salience))
+        print(u'Sentiment: {}\n'.format(entity.sentiment))
 
 
 def entity_sentiment_file(gcs_uri):
@@ -167,15 +180,15 @@ def entity_sentiment_file(gcs_uri):
       document, enums.EncodingType.UTF8)
 
     for entity in result.entities:
-        print('Name: "{}"'.format(entity.name))
+        print(u'Name: "{}"'.format(entity.name))
         for mention in entity.mentions:
-            print('  Begin Offset : {}'.format(mention.text.begin_offset))
-            print('  Content : {}'.format(mention.text.content))
-            print('  Magnitude : {}'.format(mention.sentiment.magnitude))
-            print('  Sentiment : {}'.format(mention.sentiment.score))
-            print('  Type : {}'.format(mention.type))
-        print('Salience: {}'.format(entity.salience))
-        print('Sentiment: {}\n'.format(entity.sentiment))
+            print(u'  Begin Offset : {}'.format(mention.text.begin_offset))
+            print(u'  Content : {}'.format(mention.text.content))
+            print(u'  Magnitude : {}'.format(mention.sentiment.magnitude))
+            print(u'  Sentiment : {}'.format(mention.sentiment.score))
+            print(u'  Type : {}'.format(mention.type))
+        print(u'Salience: {}'.format(entity.salience))
+        print(u'Sentiment: {}\n'.format(entity.sentiment))
 
 
 if __name__ == '__main__':

diff --git a/packages/google-cloud-language/samples/snippets/cloud-client/v1beta2/snippets_test.py b/packages/google-cloud-language/samples/snippets/cloud-client/v1beta2/snippets_test.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Copyright 2017 Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -26,6 +27,15 @@ def test_sentiment_text(capsys):
     assert 'Score: 0' in out
 
 
+def test_sentiment_utf(capsys):
+    snippets.sentiment_text(
+        u'1er site d\'information. Les articles du journal et toute l\'' +
+        u'actualité en continu : International, France, Société, Economie, ' +
+        u'Culture, Environnement')
+    out, _ = capsys.readouterr()
+    assert 'Score: 0' in out
+
+
 def test_sentiment_file(capsys):
     snippets.sentiment_file(TEST_FILE_URL)
     out, _ = capsys.readouterr()