From f12174109e50fb45f123e9951c18d024f9cdafb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20K=C5=82oczko?= Date: Tue, 9 Apr 2024 12:53:24 +0000 Subject: [PATCH 1/3] really drop python<=3.7 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Filter all code over `pyupgrade --py38-plus`. Signed-off-by: Tomasz Kłoczko --- pdfminer/converter.py | 6 +++--- pdfminer/fontmetrics.py | 4 ++-- pdfminer/glyphlist.py | 2 +- pdfminer/layout.py | 16 ++++++++-------- pdfminer/pdfdevice.py | 4 ++-- pdfminer/pdfdocument.py | 6 +++--- pdfminer/pdffont.py | 2 +- pdfminer/pdfinterp.py | 2 +- pdfminer/pdftypes.py | 29 ++++++++++++----------------- pdfminer/psparser.py | 4 ++-- pdfminer/utils.py | 8 ++++---- setup.py | 2 +- tests/test_pdfdocument.py | 2 +- tests/test_pdfpage.py | 2 +- tests/test_tools_dumppdf.py | 4 ++-- tests/test_tools_pdf2txt.py | 4 ++-- tests/test_utils.py | 2 +- tools/conv_afm.py | 2 +- tools/dumppdf.py | 6 +++--- tools/pdf2txt.py | 4 ++-- 20 files changed, 53 insertions(+), 58 deletions(-) diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 9cc2ac5f..12d36f80 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -458,7 +458,7 @@ def write_header(self) -> None: def write_footer(self) -> None: page_links = [ - '{}'.format(i, i) for i in range(1, self.pageno) + f'{i}' for i in range(1, self.pageno) ] s = '
Page: %s
\n' % ", ".join( page_links @@ -783,7 +783,7 @@ def render(item: LTItem) -> None: ) self.write(s) elif isinstance(item, LTFigure): - s = '
\n' % (item.name, bbox2str(item.bbox)) + s = '
\n'.format(item.name, bbox2str(item.bbox)) self.write(s) for child in item: render(child) @@ -974,7 +974,7 @@ def render(item: LTItem) -> None: self.write("\n") elif isinstance(item, LTTextLine): self.write( - "" % ((self.bbox_repr(item.bbox))) + "" % (self.bbox_repr(item.bbox)) ) for child_line in item: render(child_line) diff --git a/pdfminer/fontmetrics.py b/pdfminer/fontmetrics.py index 54b2c5a9..72038a10 100644 --- a/pdfminer/fontmetrics.py +++ b/pdfminer/fontmetrics.py @@ -36,7 +36,7 @@ def convert_font_metrics(path: str) -> None: See below for the output. """ fonts = {} - with open(path, "r") as fileinput: + with open(path) as fileinput: for line in fileinput.readlines(): f = line.strip().split(" ") if not f: @@ -66,7 +66,7 @@ def convert_font_metrics(path: str) -> None: print("# -*- python -*-") print("FONT_METRICS = {") for (fontname, (props, chars)) in fonts.items(): - print(" {!r}: {!r},".format(fontname, (props, chars))) + print(f" {fontname!r}: {(props, chars)!r},") print("}") diff --git a/pdfminer/glyphlist.py b/pdfminer/glyphlist.py index 9d4eb908..9e5135f3 100644 --- a/pdfminer/glyphlist.py +++ b/pdfminer/glyphlist.py @@ -58,7 +58,7 @@ def convert_glyphlist(path: str) -> None: See output below. """ state = 0 - with open(path, "r") as fileinput: + with open(path) as fileinput: for line in fileinput.readlines(): line = line.strip() if not line or line.startswith("#"): diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 408a527a..b4028f76 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -129,7 +129,7 @@ class LTText: """Interface for things that have text""" def __repr__(self) -> str: - return "<%s %r>" % (self.__class__.__name__, self.get_text()) + return "<{} {!r}>".format(self.__class__.__name__, self.get_text()) def get_text(self) -> str: """Text contained in this object""" @@ -144,7 +144,7 @@ def __init__(self, bbox: Rect) -> None: self.set_bbox(bbox) def __repr__(self) -> str: - return "<%s %s>" % (self.__class__.__name__, bbox2str(self.bbox)) + return "<{} {}>".format(self.__class__.__name__, bbox2str(self.bbox)) # Disable comparison. def __lt__(self, _: object) -> bool: @@ -330,7 +330,7 @@ def __init__(self, name: str, stream: PDFStream, bbox: Rect) -> None: self.colorspace = [self.colorspace] def __repr__(self) -> str: - return "<%s(%s) %s %r>" % ( + return "<{}({}) {} {!r}>".format( self.__class__.__name__, self.name, bbox2str(self.bbox), @@ -410,7 +410,7 @@ def __init__( return def __repr__(self) -> str: - return "<%s %s matrix=%s font=%r adv=%s text=%r>" % ( + return "<{} {} matrix={} font={!r} adv={} text={!r}>".format( self.__class__.__name__, bbox2str(self.bbox), matrix2str(self.matrix), @@ -503,7 +503,7 @@ def __init__(self, word_margin: float) -> None: return def __repr__(self) -> str: - return "<%s %s %r>" % ( + return "<{} {} {!r}>".format( self.__class__.__name__, bbox2str(self.bbox), self.get_text(), @@ -674,7 +674,7 @@ def __init__(self) -> None: return def __repr__(self) -> str: - return "<%s(%s) %s %r>" % ( + return "<{}({}) {} {!r}>".format( self.__class__.__name__, self.index, bbox2str(self.bbox), @@ -1007,7 +1007,7 @@ def __init__(self, name: str, bbox: Rect, matrix: Matrix) -> None: return def __repr__(self) -> str: - return "<%s(%s) %s matrix=%s>" % ( + return "<{}({}) {} matrix={}>".format( self.__class__.__name__, self.name, bbox2str(self.bbox), @@ -1035,7 +1035,7 @@ def __init__(self, pageid: int, bbox: Rect, rotate: float = 0) -> None: return def __repr__(self) -> str: - return "<%s(%r) %s rotate=%r>" % ( + return "<{}({!r}) {} rotate={!r}>".format( self.__class__.__name__, self.pageid, bbox2str(self.bbox), diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index 075585fa..a3564909 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -292,11 +292,11 @@ def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None if isinstance(props, dict): s = "".join( [ - ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v)) + f' {utils.enc(k)}="{utils.make_compat_str(v)}"' for (k, v) in sorted(props.items()) ] ) - out_s = "<{}{}>".format(utils.enc(cast(str, tag.name)), s) + out_s = f"<{utils.enc(cast(str, tag.name))}{s}>" self._write(out_s) self._stack.append(tag) return diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 258e9473..61f3f18a 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -158,12 +158,12 @@ def load(self, parser: PDFParser) -> None: break f = line.split(b" ") if len(f) != 2: - error_msg = "Trailer not found: {!r}: line={!r}".format(parser, line) + error_msg = f"Trailer not found: {parser!r}: line={line!r}" raise PDFNoValidXRef(error_msg) try: (start, nobjs) = map(int, f) except ValueError: - error_msg = "Invalid line: {!r}: line={!r}".format(parser, line) + error_msg = f"Invalid line: {parser!r}: line={line!r}" raise PDFNoValidXRef(error_msg) for objid in range(start, start + nobjs): try: @@ -833,7 +833,7 @@ def _getobj_parse(self, pos: int, objid: int) -> object: objid1 = x[-2] # #### end hack around malformed pdf files if objid1 != objid: - raise PDFSyntaxError("objid mismatch: {!r}={!r}".format(objid1, objid)) + raise PDFSyntaxError(f"objid mismatch: {objid1!r}={objid!r}") if kwd != KWD(b"obj"): raise PDFSyntaxError("Invalid object spec: offset=%r" % pos) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 248c8c6b..2a23a00e 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -1063,7 +1063,7 @@ def __init__( cid_ordering = resolve1(self.cidsysteminfo.get("Ordering", b"unknown")).decode( "latin1" ) - self.cidcoding = "{}-{}".format(cid_registry.strip(), cid_ordering.strip()) + self.cidcoding = f"{cid_registry.strip()}-{cid_ordering.strip()}" self.cmap: CMapBase = self.get_cmap_from_spec(spec, strict) try: diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 0ac8e5ad..a51fa7d5 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -320,7 +320,7 @@ def do_keyword(self, pos: int, token: PSKeyword) -> None: try: (_, objs) = self.end_type("inline") if len(objs) % 2 != 0: - error_msg = "Invalid dictionary construct: {!r}".format(objs) + error_msg = f"Invalid dictionary construct: {objs!r}" raise PSTypeError(error_msg) d = {literal_name(k): v for (k, v) in choplist(2, objs)} (pos, data) = self.get_inline_data(pos + len(b"ID ")) diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 5f302ba3..16e7e970 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -44,25 +44,20 @@ LITERALS_JPX_DECODE = (LIT("JPXDecode"),) -if sys.version_info >= (3, 8): - from typing import Protocol +from typing import Protocol - class DecipherCallable(Protocol): - """Fully typed a decipher callback, with optional parameter.""" +class DecipherCallable(Protocol): + """Fully typed a decipher callback, with optional parameter.""" - def __call__( - self, - objid: int, - genno: int, - data: bytes, - attrs: Optional[Dict[str, Any]] = None, - ) -> bytes: - raise NotImplementedError - -else: # Fallback for older Python - from typing import Callable + def __call__( + self, + objid: int, + genno: int, + data: bytes, + attrs: Optional[Dict[str, Any]] = None, + ) -> bytes: + raise NotImplementedError - DecipherCallable = Callable[..., bytes] class PDFObject(PSObject): @@ -333,7 +328,7 @@ def decode(self) -> None: except zlib.error as e: if settings.STRICT: - error_msg = "Invalid zlib bytes: {!r}, {!r}".format(e, data) + error_msg = f"Invalid zlib bytes: {e!r}, {data!r}" raise PDFException(error_msg) try: diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index 83d15140..a8f5a57e 100755 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -132,7 +132,7 @@ def intern(self, name: PSLiteral.NameType) -> _SymbolT: def literal_name(x: object) -> Any: if not isinstance(x, PSLiteral): if settings.STRICT: - raise PSTypeError("Literal required: {!r}".format(x)) + raise PSTypeError(f"Literal required: {x!r}") else: name = x else: @@ -592,7 +592,7 @@ def start_type(self, pos: int, type: str) -> None: def end_type(self, type: str) -> Tuple[int, List[PSStackType[ExtraT]]]: if self.curtype != type: - raise PSTypeError("Type mismatch: {!r} != {!r}".format(self.curtype, type)) + raise PSTypeError(f"Type mismatch: {self.curtype!r} != {type!r}") objs = [obj for (_, obj) in self.curstack] (pos, self.curtype, self.curstack) = self.context.pop() log.debug("end_type: pos=%r, type=%r, objs=%r", pos, type, objs) diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 59cf5cd3..16aad923 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -39,7 +39,7 @@ AnyIO = Union[TextIO, BinaryIO] -class open_filename(object): +class open_filename: """ Context manager that allows opening a filename (str or pathlib.PurePath type is supported) and closes it on exit, @@ -89,7 +89,7 @@ def shorten_str(s: str, size: int) -> str: return s[:size] if len(s) > size: length = (size - 5) // 2 - return "{} ... {}".format(s[:length], s[-length:]) + return f"{s[:length]} ... {s[-length:]}" else: return s @@ -643,12 +643,12 @@ def enc(x: str) -> str: def bbox2str(bbox: Rect) -> str: (x0, y0, x1, y1) = bbox - return "{:.3f},{:.3f},{:.3f},{:.3f}".format(x0, y0, x1, y1) + return f"{x0:.3f},{y0:.3f},{x1:.3f},{y1:.3f}" def matrix2str(m: Matrix) -> str: (a, b, c, d, e, f) = m - return "[{:.2f},{:.2f},{:.2f},{:.2f}, ({:.2f},{:.2f})]".format(a, b, c, d, e, f) + return f"[{a:.2f},{b:.2f},{c:.2f},{d:.2f}, ({e:.2f},{f:.2f})]" def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point: diff --git a/setup.py b/setup.py index 84e5ceea..0e60b38e 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup root_dir = Path(__file__).parent -with open(root_dir / "README.md", "rt") as f: +with open(root_dir / "README.md") as f: readme = f.read() setup( diff --git a/tests/test_pdfdocument.py b/tests/test_pdfdocument.py index c57126fb..343e1dcb 100644 --- a/tests/test_pdfdocument.py +++ b/tests/test_pdfdocument.py @@ -8,7 +8,7 @@ from tests.helpers import absolute_sample_path -class TestPdfDocument(object): +class TestPdfDocument: def test_get_zero_objid_raises_pdfobjectnotfound(self): with open(absolute_sample_path("simple1.pdf"), "rb") as in_file: parser = PDFParser(in_file) diff --git a/tests/test_pdfpage.py b/tests/test_pdfpage.py index c3fe86c2..a99a2f47 100644 --- a/tests/test_pdfpage.py +++ b/tests/test_pdfpage.py @@ -4,7 +4,7 @@ from tests.helpers import absolute_sample_path -class TestPdfPage(object): +class TestPdfPage: def test_page_labels(self): path = absolute_sample_path("contrib/pagelabels.pdf") expected_labels = ["iii", "iv", "1", "2", "1"] diff --git a/tests/test_tools_dumppdf.py b/tests/test_tools_dumppdf.py index 971c3d07..735556e2 100644 --- a/tests/test_tools_dumppdf.py +++ b/tests/test_tools_dumppdf.py @@ -11,9 +11,9 @@ def run(filename, options=None): absolute_path = absolute_sample_path(filename) with TemporaryFilePath() as output_file_name: if options: - s = "dumppdf -o %s %s %s" % (output_file_name, options, absolute_path) + s = "dumppdf -o {} {} {}".format(output_file_name, options, absolute_path) else: - s = "dumppdf -o %s %s" % (output_file_name, absolute_path) + s = "dumppdf -o {} {}".format(output_file_name, absolute_path) dumppdf.main(s.split(" ")[1:]) diff --git a/tests/test_tools_pdf2txt.py b/tests/test_tools_pdf2txt.py index ccbe55f9..a6e0ee1a 100644 --- a/tests/test_tools_pdf2txt.py +++ b/tests/test_tools_pdf2txt.py @@ -12,9 +12,9 @@ def run(sample_path, options=None): absolute_path = absolute_sample_path(sample_path) with TemporaryFilePath() as output_file_name: if options: - s = "pdf2txt -o{} {} {}".format(output_file_name, options, absolute_path) + s = f"pdf2txt -o{output_file_name} {options} {absolute_path}" else: - s = "pdf2txt -o{} {}".format(output_file_name, absolute_path) + s = f"pdf2txt -o{output_file_name} {absolute_path}" pdf2txt.main(s.split(" ")[1:]) diff --git a/tests/test_utils.py b/tests/test_utils.py index 160b02b4..af37ce9a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -71,7 +71,7 @@ def given_plane_with_one_object(object_size=50, gridsize=50): return plane, obj -class TestFunctions(object): +class TestFunctions: def test_shorten_str(self): s = shorten_str("Hello there World", 15) assert s == "Hello ... World" diff --git a/tools/conv_afm.py b/tools/conv_afm.py index f666ee18..2d663cc1 100755 --- a/tools/conv_afm.py +++ b/tools/conv_afm.py @@ -36,7 +36,7 @@ def main(argv): print("# -*- python -*-") print("FONT_METRICS = {") for (fontname, (props, chars)) in fonts.items(): - print(" {!r}: {!r},".format(fontname, (props, chars))) + print(f" {fontname!r}: {(props, chars)!r},") print("}") return 0 diff --git a/tools/dumppdf.py b/tools/dumppdf.py index cc8c4558..2b9b079d 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -186,7 +186,7 @@ def resolve_dest(dest: object) -> Any: dest = resolve_dest(action["D"]) pageno = pages[dest[0].objid] s = escape(title) - outfp.write('\n'.format(level, s)) + outfp.write(f'\n') if dest is not None: outfp.write("") dumpxml(outfp, dest) @@ -224,7 +224,7 @@ def extract1(objid: int, obj: Dict[str, Any]) -> None: ) path = os.path.join(extractdir, "%.6d-%s" % (objid, filename)) if os.path.exists(path): - raise IOError("file exists: %r" % path) + raise OSError("file exists: %r" % path) print("extracting: %r" % path) os.makedirs(os.path.dirname(path), exist_ok=True) out = open(path, "wb") @@ -300,7 +300,7 @@ def create_parser() -> ArgumentParser: "--version", "-v", action="version", - version="pdfminer.six v{}".format(pdfminer.__version__), + version=f"pdfminer.six v{pdfminer.__version__}", ) parser.add_argument( "--debug", diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index 0511b937..8bd959ac 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -21,7 +21,7 @@ def float_or_disabled(x: str) -> Optional[float]: try: return float(x) except ValueError: - raise argparse.ArgumentTypeError("invalid float value: {}".format(x)) + raise argparse.ArgumentTypeError(f"invalid float value: {x}") def extract_text( @@ -77,7 +77,7 @@ def create_parser() -> argparse.ArgumentParser: "--version", "-v", action="version", - version="pdfminer.six v{}".format(pdfminer.__version__), + version=f"pdfminer.six v{pdfminer.__version__}", ) parser.add_argument( "--debug", From f90f36d1aff49acf5e494805483265f56bdd02a8 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Sat, 6 Jul 2024 15:35:08 +0200 Subject: [PATCH 2/3] Nox --- pdfminer/converter.py | 8 ++++---- pdfminer/pdftypes.py | 5 +---- tools/pdf2txt.py | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 12d36f80..c567c3c0 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -457,9 +457,7 @@ def write_header(self) -> None: return def write_footer(self) -> None: - page_links = [ - f'{i}' for i in range(1, self.pageno) - ] + page_links = [f'{i}' for i in range(1, self.pageno)] s = '
Page: %s
\n' % ", ".join( page_links ) @@ -783,7 +781,9 @@ def render(item: LTItem) -> None: ) self.write(s) elif isinstance(item, LTFigure): - s = '
\n'.format(item.name, bbox2str(item.bbox)) + s = '
\n'.format( + item.name, bbox2str(item.bbox) + ) self.write(s) for child in item: render(child) diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 16e7e970..1a31a8d6 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -1,7 +1,7 @@ import io import logging -import sys import zlib +from typing import Protocol from typing import ( TYPE_CHECKING, Any, @@ -44,8 +44,6 @@ LITERALS_JPX_DECODE = (LIT("JPXDecode"),) -from typing import Protocol - class DecipherCallable(Protocol): """Fully typed a decipher callback, with optional parameter.""" @@ -59,7 +57,6 @@ def __call__( raise NotImplementedError - class PDFObject(PSObject): pass diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index 8bd959ac..4f51af94 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -40,7 +40,7 @@ def extract_text( output_dir: Optional[str] = None, debug: bool = False, disable_caching: bool = False, - **kwargs: Any + **kwargs: Any, ) -> AnyIO: if not files: raise ValueError("Must provide files to work upon!") From 97fcce3e9605d077cae451234324d196f0e8139a Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Sat, 6 Jul 2024 15:38:55 +0200 Subject: [PATCH 3/3] Added line to CHANGELOG.md --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71bb1615..ad4282d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added - Support for zipped jpeg's ([#938](https://github.com/pdfminer/pdfminer.six/pull/938)) - - Fuzzing harnesses for integration into Google's OSS-Fuzz ([949](https://github.com/pdfminer/pdfminer.six/pull/949)) - Support for setuptools-git-versioning version 2.0.0 ([#957](https://github.com/pdfminer/pdfminer.six/pull/957)) @@ -21,6 +20,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Reading cmap's with whitespace in the name ([#935](https://github.com/pdfminer/pdfminer.six/pull/935)) - Optimize `apply_png_predictor` by using lists ([#912](https://github.com/pdfminer/pdfminer.six/pull/912)) +### Changed + +- Updated Python 3.7 syntax to 3.8 ([#956](https://github.com/pdfminer/pdfminer.six/pull/956)) + ## [20231228] ### Removed