Catch ValueError when converting font encoding differences to charact…

…ers (#389) * Catch ValueError when calling `name2unicode` when a unicode value cannot be parsed * Add test for catching ValueError and KeyError when font encoding differences are invalid * Added line to CHANGELOG.md
pdfminer · Mar 16, 2020 · 9d7fe2d · 9d7fe2d
1 parent a087d6d
commit 9d7fe2d
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ## [Unreleased]
 
 ### Fixed
+
+- Ignore ValueError when converting font encoding differences ([#389](https://github.com/pdfminer/pdfminer.six/pull/389))
 - Grouping of text lines outside of parent container bounding box ([#386](https://github.com/pdfminer/pdfminer.six/pull/386))
 
 ## [20200124] - 2020-01-24

diff --git a/pdfminer/encodingdb.py b/pdfminer/encodingdb.py
@@ -106,7 +106,7 @@ def get_encoding(cls, name, diff=None):
                 elif isinstance(x, PSLiteral):
                     try:
                         cid2unicode[cid] = name2unicode(x.name)
-                    except KeyError as e:
+                    except (KeyError, ValueError) as e:
                         log.debug(str(e))
                     cid += 1
         return cid2unicode
diff --git a/tests/test_encodingdb.py b/tests/test_encodingdb.py
@@ -6,7 +6,8 @@
 """
 from nose.tools import assert_raises
 
-from pdfminer.encodingdb import name2unicode
+from pdfminer.encodingdb import name2unicode, EncodingDB
+from pdfminer.psparser import PSLiteral
 
 
 def test_name2unicode_name_in_agl():
@@ -145,3 +146,12 @@ def test_name2unicode_pua_ogoneksmall():
 
 def test_name2unicode_overflow_error():
     assert_raises(KeyError, name2unicode, '226215240241240240240240')
+
+
+def test_get_encoding_with_invalid_differences():
+    """Invalid differences should be silently ignored
+
+    Regression test for https://github.com/pdfminer/pdfminer.six/issues/385
+    """
+    invalid_differences = [PSLiteral('ubuntu'), PSLiteral('1234')]
+    EncodingDB.get_encoding('StandardEncoding', invalid_differences)