pdfminer · pietermarsman · Jul 6, 2024 · Apr 9, 2024 · Jun 27, 2024 · Jul 6, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,7 +8,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Added
 
 - Support for zipped jpeg's ([#938](https://github.com/pdfminer/pdfminer.six/pull/938))
-
 - Fuzzing harnesses for integration into Google's OSS-Fuzz ([949](https://github.com/pdfminer/pdfminer.six/pull/949))
 - Support for setuptools-git-versioning version 2.0.0 ([#957](https://github.com/pdfminer/pdfminer.six/pull/957))
 
@@ -21,6 +20,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - Reading cmap's with whitespace in the name ([#935](https://github.com/pdfminer/pdfminer.six/pull/935))
 - Optimize `apply_png_predictor` by using lists ([#912](https://github.com/pdfminer/pdfminer.six/pull/912))
 
+### Changed
+
+- Updated Python 3.7 syntax to 3.8 ([#956](https://github.com/pdfminer/pdfminer.six/pull/956))
+
 ## [20231228]
 
 ### Removed

diff --git a/pdfminer/converter.py b/pdfminer/converter.py
@@ -458,9 +458,7 @@ def write_header(self) -> None:
         return
 
     def write_footer(self) -> None:
-        page_links = [
-            '<a href="#{}">{}</a>'.format(i, i) for i in range(1, self.pageno)
-        ]
+        page_links = [f'<a href="#{i}">{i}</a>' for i in range(1, self.pageno)]
         s = '<div style="position:absolute; top:0px;">Page: %s</div>\n' % ", ".join(
             page_links
         )
@@ -784,7 +782,9 @@ def render(item: LTItem) -> None:
                 )
                 self.write(s)
             elif isinstance(item, LTFigure):
-                s = '<figure name="%s" bbox="%s">\n' % (item.name, bbox2str(item.bbox))
+                s = '<figure name="{}" bbox="{}">\n'.format(
+                    item.name, bbox2str(item.bbox)
+                )
                 self.write(s)
                 for child in item:
                     render(child)
@@ -975,7 +975,7 @@ def render(item: LTItem) -> None:
                 self.write("</div>\n")
             elif isinstance(item, LTTextLine):
                 self.write(
-                    "<span class='ocr_line' title='%s'>" % ((self.bbox_repr(item.bbox)))
+                    "<span class='ocr_line' title='%s'>" % (self.bbox_repr(item.bbox))
                 )
                 for child_line in item:
                     render(child_line)

diff --git a/pdfminer/fontmetrics.py b/pdfminer/fontmetrics.py
@@ -36,7 +36,7 @@ def convert_font_metrics(path: str) -> None:
     See below for the output.
     """
     fonts = {}
-    with open(path, "r") as fileinput:
+    with open(path) as fileinput:
         for line in fileinput.readlines():
             f = line.strip().split(" ")
             if not f:
@@ -66,7 +66,7 @@ def convert_font_metrics(path: str) -> None:
         print("# -*- python -*-")
         print("FONT_METRICS = {")
         for (fontname, (props, chars)) in fonts.items():
-            print(" {!r}: {!r},".format(fontname, (props, chars)))
+            print(f" {fontname!r}: {(props, chars)!r},")
         print("}")
 
 

diff --git a/pdfminer/glyphlist.py b/pdfminer/glyphlist.py
@@ -58,7 +58,7 @@ def convert_glyphlist(path: str) -> None:
     See output below.
     """
     state = 0
-    with open(path, "r") as fileinput:
+    with open(path) as fileinput:
         for line in fileinput.readlines():
             line = line.strip()
             if not line or line.startswith("#"):

diff --git a/pdfminer/layout.py b/pdfminer/layout.py
@@ -130,7 +130,7 @@ class LTText:
     """Interface for things that have text"""
 
     def __repr__(self) -> str:
-        return "<%s %r>" % (self.__class__.__name__, self.get_text())
+        return "<{} {!r}>".format(self.__class__.__name__, self.get_text())
 
     def get_text(self) -> str:
         """Text contained in this object"""
@@ -145,7 +145,7 @@ def __init__(self, bbox: Rect) -> None:
         self.set_bbox(bbox)
 
     def __repr__(self) -> str:
-        return "<%s %s>" % (self.__class__.__name__, bbox2str(self.bbox))
+        return "<{} {}>".format(self.__class__.__name__, bbox2str(self.bbox))
 
     # Disable comparison.
     def __lt__(self, _: object) -> bool:
@@ -331,7 +331,7 @@ def __init__(self, name: str, stream: PDFStream, bbox: Rect) -> None:
             self.colorspace = [self.colorspace]
 
     def __repr__(self) -> str:
-        return "<%s(%s) %s %r>" % (
+        return "<{}({}) {} {!r}>".format(
             self.__class__.__name__,
             self.name,
             bbox2str(self.bbox),
@@ -411,7 +411,7 @@ def __init__(
         return
 
     def __repr__(self) -> str:
-        return "<%s %s matrix=%s font=%r adv=%s text=%r>" % (
+        return "<{} {} matrix={} font={!r} adv={} text={!r}>".format(
             self.__class__.__name__,
             bbox2str(self.bbox),
             matrix2str(self.matrix),
@@ -504,7 +504,7 @@ def __init__(self, word_margin: float) -> None:
         return
 
     def __repr__(self) -> str:
-        return "<%s %s %r>" % (
+        return "<{} {} {!r}>".format(
             self.__class__.__name__,
             bbox2str(self.bbox),
             self.get_text(),
@@ -675,7 +675,7 @@ def __init__(self) -> None:
         return
 
     def __repr__(self) -> str:
-        return "<%s(%s) %s %r>" % (
+        return "<{}({}) {} {!r}>".format(
             self.__class__.__name__,
             self.index,
             bbox2str(self.bbox),
@@ -1008,7 +1008,7 @@ def __init__(self, name: str, bbox: Rect, matrix: Matrix) -> None:
         return
 
     def __repr__(self) -> str:
-        return "<%s(%s) %s matrix=%s>" % (
+        return "<{}({}) {} matrix={}>".format(
             self.__class__.__name__,
             self.name,
             bbox2str(self.bbox),
@@ -1036,7 +1036,7 @@ def __init__(self, pageid: int, bbox: Rect, rotate: float = 0) -> None:
         return
 
     def __repr__(self) -> str:
-        return "<%s(%r) %s rotate=%r>" % (
+        return "<{}({!r}) {} rotate={!r}>".format(
             self.__class__.__name__,
             self.pageid,
             bbox2str(self.bbox),

diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py
@@ -292,11 +292,11 @@ def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None
         if isinstance(props, dict):
             s = "".join(
                 [
-                    ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v))
+                    f' {utils.enc(k)}="{utils.make_compat_str(v)}"'
                     for (k, v) in sorted(props.items())
                 ]
             )
-        out_s = "<{}{}>".format(utils.enc(cast(str, tag.name)), s)
+        out_s = f"<{utils.enc(cast(str, tag.name))}{s}>"
         self._write(out_s)
         self._stack.append(tag)
         return

diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py
@@ -157,12 +157,12 @@ def load(self, parser: PDFParser) -> None:
                 break
             f = line.split(b" ")
             if len(f) != 2:
-                error_msg = "Trailer not found: {!r}: line={!r}".format(parser, line)
+                error_msg = f"Trailer not found: {parser!r}: line={line!r}"
                 raise PDFNoValidXRef(error_msg)
             try:
                 (start, nobjs) = map(int, f)
             except ValueError:
-                error_msg = "Invalid line: {!r}: line={!r}".format(parser, line)
+                error_msg = f"Invalid line: {parser!r}: line={line!r}"
                 raise PDFNoValidXRef(error_msg)
             for objid in range(start, start + nobjs):
                 try:
@@ -829,7 +829,7 @@ def _getobj_parse(self, pos: int, objid: int) -> object:
                 objid1 = x[-2]
         # #### end hack around malformed pdf files
         if objid1 != objid:
-            raise PDFSyntaxError("objid mismatch: {!r}={!r}".format(objid1, objid))
+            raise PDFSyntaxError(f"objid mismatch: {objid1!r}={objid!r}")
 
         if kwd != KWD(b"obj"):
             raise PDFSyntaxError("Invalid object spec: offset=%r" % pos)

diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py
@@ -1063,7 +1063,7 @@ def __init__(
         cid_ordering = resolve1(self.cidsysteminfo.get("Ordering", b"unknown")).decode(
             "latin1"
         )
-        self.cidcoding = "{}-{}".format(cid_registry.strip(), cid_ordering.strip())
+        self.cidcoding = f"{cid_registry.strip()}-{cid_ordering.strip()}"
         self.cmap: CMapBase = self.get_cmap_from_spec(spec, strict)
 
         try:

diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
@@ -320,7 +320,7 @@ def do_keyword(self, pos: int, token: PSKeyword) -> None:
             try:
                 (_, objs) = self.end_type("inline")
                 if len(objs) % 2 != 0:
-                    error_msg = "Invalid dictionary construct: {!r}".format(objs)
+                    error_msg = f"Invalid dictionary construct: {objs!r}"
                     raise PSTypeError(error_msg)
                 d = {literal_name(k): v for (k, v) in choplist(2, objs)}
                 (pos, data) = self.get_inline_data(pos + len(b"ID "))

diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py
@@ -1,7 +1,7 @@
 import io
 import logging
-import sys
 import zlib
+from typing import Protocol
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -42,25 +42,17 @@
 LITERALS_JPX_DECODE = (LIT("JPXDecode"),)
 
 
-if sys.version_info >= (3, 8):
-    from typing import Protocol
+class DecipherCallable(Protocol):
+    """Fully typed a decipher callback, with optional parameter."""
 
-    class DecipherCallable(Protocol):
-        """Fully typed a decipher callback, with optional parameter."""
-
-        def __call__(
-            self,
-            objid: int,
-            genno: int,
-            data: bytes,
-            attrs: Optional[Dict[str, Any]] = None,
-        ) -> bytes:
-            raise NotImplementedError
-
-else:  # Fallback for older Python
-    from typing import Callable
-
-    DecipherCallable = Callable[..., bytes]
+    def __call__(
+        self,
+        objid: int,
+        genno: int,
+        data: bytes,
+        attrs: Optional[Dict[str, Any]] = None,
+    ) -> bytes:
+        raise NotImplementedError
 
 
 class PDFObject(PSObject):
@@ -319,7 +311,7 @@ def decode(self) -> None:
 
                 except zlib.error as e:
                     if settings.STRICT:
-                        error_msg = "Invalid zlib bytes: {!r}, {!r}".format(e, data)
+                        error_msg = f"Invalid zlib bytes: {e!r}, {data!r}"
                         raise PDFException(error_msg)
 
                     try:

diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py
@@ -120,7 +120,7 @@ def intern(self, name: PSLiteral.NameType) -> _SymbolT:
 def literal_name(x: object) -> Any:
     if not isinstance(x, PSLiteral):
         if settings.STRICT:
-            raise PSTypeError("Literal required: {!r}".format(x))
+            raise PSTypeError(f"Literal required: {x!r}")
         else:
             name = x
     else:
@@ -580,7 +580,7 @@ def start_type(self, pos: int, type: str) -> None:
 
     def end_type(self, type: str) -> Tuple[int, List[PSStackType[ExtraT]]]:
         if self.curtype != type:
-            raise PSTypeError("Type mismatch: {!r} != {!r}".format(self.curtype, type))
+            raise PSTypeError(f"Type mismatch: {self.curtype!r} != {type!r}")
         objs = [obj for (_, obj) in self.curstack]
         (pos, self.curtype, self.curstack) = self.context.pop()
         log.debug("end_type: pos=%r, type=%r, objs=%r", pos, type, objs)

diff --git a/pdfminer/utils.py b/pdfminer/utils.py
@@ -41,7 +41,7 @@
 AnyIO = Union[TextIO, BinaryIO]
 
 
-class open_filename(object):
+class open_filename:
     """
     Context manager that allows opening a filename
     (str or pathlib.PurePath type is supported) and closes it on exit,
@@ -91,7 +91,7 @@ def shorten_str(s: str, size: int) -> str:
         return s[:size]
     if len(s) > size:
         length = (size - 5) // 2
-        return "{} ... {}".format(s[:length], s[-length:])
+        return f"{s[:length]} ... {s[-length:]}"
     else:
         return s
 
@@ -645,12 +645,12 @@ def enc(x: str) -> str:
 
 def bbox2str(bbox: Rect) -> str:
     (x0, y0, x1, y1) = bbox
-    return "{:.3f},{:.3f},{:.3f},{:.3f}".format(x0, y0, x1, y1)
+    return f"{x0:.3f},{y0:.3f},{x1:.3f},{y1:.3f}"
 
 
 def matrix2str(m: Matrix) -> str:
     (a, b, c, d, e, f) = m
-    return "[{:.2f},{:.2f},{:.2f},{:.2f}, ({:.2f},{:.2f})]".format(a, b, c, d, e, f)
+    return f"[{a:.2f},{b:.2f},{c:.2f},{d:.2f}, ({e:.2f},{f:.2f})]"
 
 
 def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point:

diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 from setuptools import setup
 
 root_dir = Path(__file__).parent
-with open(root_dir / "README.md", "rt") as f:
+with open(root_dir / "README.md") as f:
     readme = f.read()
 
 extras_require = {

diff --git a/tests/test_pdfdocument.py b/tests/test_pdfdocument.py
@@ -9,7 +9,7 @@
 from tests.helpers import absolute_sample_path
 
 
-class TestPdfDocument(object):
+class TestPdfDocument:
     def test_get_zero_objid_raises_pdfobjectnotfound(self):
         with open(absolute_sample_path("simple1.pdf"), "rb") as in_file:
             parser = PDFParser(in_file)

diff --git a/tests/test_pdfpage.py b/tests/test_pdfpage.py
@@ -4,7 +4,7 @@
 from tests.helpers import absolute_sample_path
 
 
-class TestPdfPage(object):
+class TestPdfPage:
     def test_page_labels(self):
         path = absolute_sample_path("contrib/pagelabels.pdf")
         expected_labels = ["iii", "iv", "1", "2", "1"]

diff --git a/tests/test_tools_dumppdf.py b/tests/test_tools_dumppdf.py
@@ -11,9 +11,9 @@ def run(filename, options=None):
     absolute_path = absolute_sample_path(filename)
     with TemporaryFilePath() as output_file_name:
         if options:
-            s = "dumppdf -o %s %s %s" % (output_file_name, options, absolute_path)
+            s = "dumppdf -o {} {} {}".format(output_file_name, options, absolute_path)
         else:
-            s = "dumppdf -o %s %s" % (output_file_name, absolute_path)
+            s = "dumppdf -o {} {}".format(output_file_name, absolute_path)
 
         dumppdf.main(s.split(" ")[1:])
 

diff --git a/tests/test_tools_pdf2txt.py b/tests/test_tools_pdf2txt.py
@@ -12,9 +12,9 @@ def run(sample_path, options=None):
     absolute_path = absolute_sample_path(sample_path)
     with TemporaryFilePath() as output_file_name:
         if options:
-            s = "pdf2txt -o{} {} {}".format(output_file_name, options, absolute_path)
+            s = f"pdf2txt -o{output_file_name} {options} {absolute_path}"
         else:
-            s = "pdf2txt -o{} {}".format(output_file_name, absolute_path)
+            s = f"pdf2txt -o{output_file_name} {absolute_path}"
 
         pdf2txt.main(s.split(" ")[1:])
 

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -71,7 +71,7 @@ def given_plane_with_one_object(object_size=50, gridsize=50):
         return plane, obj
 
 
-class TestFunctions(object):
+class TestFunctions:
     def test_shorten_str(self):
         s = shorten_str("Hello there World", 15)
         assert s == "Hello ... World"

diff --git a/tools/conv_afm.py b/tools/conv_afm.py
@@ -36,7 +36,7 @@ def main(argv):
     print("# -*- python -*-")
     print("FONT_METRICS = {")
     for (fontname, (props, chars)) in fonts.items():
-        print(" {!r}: {!r},".format(fontname, (props, chars)))
+        print(f" {fontname!r}: {(props, chars)!r},")
     print("}")
     return 0
 

diff --git a/tools/dumppdf.py b/tools/dumppdf.py
@@ -196,7 +196,7 @@ def resolve_dest(dest: object) -> Any:
                         dest = resolve_dest(action["D"])
                         pageno = pages[dest[0].objid]
             s = escape(title)
-            outfp.write('<outline level="{!r}" title="{}">\n'.format(level, s))
+            outfp.write(f'<outline level="{level!r}" title="{s}">\n')
             if dest is not None:
                 outfp.write("<dest>")
                 dumpxml(outfp, dest)
@@ -310,7 +310,7 @@ def create_parser() -> ArgumentParser:
         "--version",
         "-v",
         action="version",
-        version="pdfminer.six v{}".format(pdfminer.__version__),
+        version=f"pdfminer.six v{pdfminer.__version__}",
     )
     parser.add_argument(
         "--debug",