diff --git a/CHANGELOG.md b/CHANGELOG.md index 71bb1615..ad4282d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added - Support for zipped jpeg's ([#938](https://github.com/pdfminer/pdfminer.six/pull/938)) - - Fuzzing harnesses for integration into Google's OSS-Fuzz ([949](https://github.com/pdfminer/pdfminer.six/pull/949)) - Support for setuptools-git-versioning version 2.0.0 ([#957](https://github.com/pdfminer/pdfminer.six/pull/957)) @@ -21,6 +20,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Reading cmap's with whitespace in the name ([#935](https://github.com/pdfminer/pdfminer.six/pull/935)) - Optimize `apply_png_predictor` by using lists ([#912](https://github.com/pdfminer/pdfminer.six/pull/912)) +### Changed + +- Updated Python 3.7 syntax to 3.8 ([#956](https://github.com/pdfminer/pdfminer.six/pull/956)) + ## [20231228] ### Removed diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 3a390d49..9b90a769 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -458,9 +458,7 @@ def write_header(self) -> None: return def write_footer(self) -> None: - page_links = [ - '{}'.format(i, i) for i in range(1, self.pageno) - ] + page_links = [f'{i}' for i in range(1, self.pageno)] s = '
Page: %s
\n' % ", ".join( page_links ) @@ -784,7 +782,9 @@ def render(item: LTItem) -> None: ) self.write(s) elif isinstance(item, LTFigure): - s = '
\n' % (item.name, bbox2str(item.bbox)) + s = '
\n'.format( + item.name, bbox2str(item.bbox) + ) self.write(s) for child in item: render(child) @@ -975,7 +975,7 @@ def render(item: LTItem) -> None: self.write("\n") elif isinstance(item, LTTextLine): self.write( - "" % ((self.bbox_repr(item.bbox))) + "" % (self.bbox_repr(item.bbox)) ) for child_line in item: render(child_line) diff --git a/pdfminer/fontmetrics.py b/pdfminer/fontmetrics.py index 54b2c5a9..72038a10 100644 --- a/pdfminer/fontmetrics.py +++ b/pdfminer/fontmetrics.py @@ -36,7 +36,7 @@ def convert_font_metrics(path: str) -> None: See below for the output. """ fonts = {} - with open(path, "r") as fileinput: + with open(path) as fileinput: for line in fileinput.readlines(): f = line.strip().split(" ") if not f: @@ -66,7 +66,7 @@ def convert_font_metrics(path: str) -> None: print("# -*- python -*-") print("FONT_METRICS = {") for (fontname, (props, chars)) in fonts.items(): - print(" {!r}: {!r},".format(fontname, (props, chars))) + print(f" {fontname!r}: {(props, chars)!r},") print("}") diff --git a/pdfminer/glyphlist.py b/pdfminer/glyphlist.py index 9d4eb908..9e5135f3 100644 --- a/pdfminer/glyphlist.py +++ b/pdfminer/glyphlist.py @@ -58,7 +58,7 @@ def convert_glyphlist(path: str) -> None: See output below. """ state = 0 - with open(path, "r") as fileinput: + with open(path) as fileinput: for line in fileinput.readlines(): line = line.strip() if not line or line.startswith("#"): diff --git a/pdfminer/layout.py b/pdfminer/layout.py index ebaac1e2..e706f6e1 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -130,7 +130,7 @@ class LTText: """Interface for things that have text""" def __repr__(self) -> str: - return "<%s %r>" % (self.__class__.__name__, self.get_text()) + return "<{} {!r}>".format(self.__class__.__name__, self.get_text()) def get_text(self) -> str: """Text contained in this object""" @@ -145,7 +145,7 @@ def __init__(self, bbox: Rect) -> None: self.set_bbox(bbox) def __repr__(self) -> str: - return "<%s %s>" % (self.__class__.__name__, bbox2str(self.bbox)) + return "<{} {}>".format(self.__class__.__name__, bbox2str(self.bbox)) # Disable comparison. def __lt__(self, _: object) -> bool: @@ -331,7 +331,7 @@ def __init__(self, name: str, stream: PDFStream, bbox: Rect) -> None: self.colorspace = [self.colorspace] def __repr__(self) -> str: - return "<%s(%s) %s %r>" % ( + return "<{}({}) {} {!r}>".format( self.__class__.__name__, self.name, bbox2str(self.bbox), @@ -411,7 +411,7 @@ def __init__( return def __repr__(self) -> str: - return "<%s %s matrix=%s font=%r adv=%s text=%r>" % ( + return "<{} {} matrix={} font={!r} adv={} text={!r}>".format( self.__class__.__name__, bbox2str(self.bbox), matrix2str(self.matrix), @@ -504,7 +504,7 @@ def __init__(self, word_margin: float) -> None: return def __repr__(self) -> str: - return "<%s %s %r>" % ( + return "<{} {} {!r}>".format( self.__class__.__name__, bbox2str(self.bbox), self.get_text(), @@ -675,7 +675,7 @@ def __init__(self) -> None: return def __repr__(self) -> str: - return "<%s(%s) %s %r>" % ( + return "<{}({}) {} {!r}>".format( self.__class__.__name__, self.index, bbox2str(self.bbox), @@ -1008,7 +1008,7 @@ def __init__(self, name: str, bbox: Rect, matrix: Matrix) -> None: return def __repr__(self) -> str: - return "<%s(%s) %s matrix=%s>" % ( + return "<{}({}) {} matrix={}>".format( self.__class__.__name__, self.name, bbox2str(self.bbox), @@ -1036,7 +1036,7 @@ def __init__(self, pageid: int, bbox: Rect, rotate: float = 0) -> None: return def __repr__(self) -> str: - return "<%s(%r) %s rotate=%r>" % ( + return "<{}({!r}) {} rotate={!r}>".format( self.__class__.__name__, self.pageid, bbox2str(self.bbox), diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index 075585fa..a3564909 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -292,11 +292,11 @@ def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None if isinstance(props, dict): s = "".join( [ - ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v)) + f' {utils.enc(k)}="{utils.make_compat_str(v)}"' for (k, v) in sorted(props.items()) ] ) - out_s = "<{}{}>".format(utils.enc(cast(str, tag.name)), s) + out_s = f"<{utils.enc(cast(str, tag.name))}{s}>" self._write(out_s) self._stack.append(tag) return diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index e9f91ad9..6898759f 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -157,12 +157,12 @@ def load(self, parser: PDFParser) -> None: break f = line.split(b" ") if len(f) != 2: - error_msg = "Trailer not found: {!r}: line={!r}".format(parser, line) + error_msg = f"Trailer not found: {parser!r}: line={line!r}" raise PDFNoValidXRef(error_msg) try: (start, nobjs) = map(int, f) except ValueError: - error_msg = "Invalid line: {!r}: line={!r}".format(parser, line) + error_msg = f"Invalid line: {parser!r}: line={line!r}" raise PDFNoValidXRef(error_msg) for objid in range(start, start + nobjs): try: @@ -829,7 +829,7 @@ def _getobj_parse(self, pos: int, objid: int) -> object: objid1 = x[-2] # #### end hack around malformed pdf files if objid1 != objid: - raise PDFSyntaxError("objid mismatch: {!r}={!r}".format(objid1, objid)) + raise PDFSyntaxError(f"objid mismatch: {objid1!r}={objid!r}") if kwd != KWD(b"obj"): raise PDFSyntaxError("Invalid object spec: offset=%r" % pos) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index b521cd52..a32b55e4 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -1063,7 +1063,7 @@ def __init__( cid_ordering = resolve1(self.cidsysteminfo.get("Ordering", b"unknown")).decode( "latin1" ) - self.cidcoding = "{}-{}".format(cid_registry.strip(), cid_ordering.strip()) + self.cidcoding = f"{cid_registry.strip()}-{cid_ordering.strip()}" self.cmap: CMapBase = self.get_cmap_from_spec(spec, strict) try: diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 79b351a6..3ff2c144 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -320,7 +320,7 @@ def do_keyword(self, pos: int, token: PSKeyword) -> None: try: (_, objs) = self.end_type("inline") if len(objs) % 2 != 0: - error_msg = "Invalid dictionary construct: {!r}".format(objs) + error_msg = f"Invalid dictionary construct: {objs!r}" raise PSTypeError(error_msg) d = {literal_name(k): v for (k, v) in choplist(2, objs)} (pos, data) = self.get_inline_data(pos + len(b"ID ")) diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 635c5a66..a2dced55 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -1,7 +1,7 @@ import io import logging -import sys import zlib +from typing import Protocol from typing import ( TYPE_CHECKING, Any, @@ -42,25 +42,17 @@ LITERALS_JPX_DECODE = (LIT("JPXDecode"),) -if sys.version_info >= (3, 8): - from typing import Protocol +class DecipherCallable(Protocol): + """Fully typed a decipher callback, with optional parameter.""" - class DecipherCallable(Protocol): - """Fully typed a decipher callback, with optional parameter.""" - - def __call__( - self, - objid: int, - genno: int, - data: bytes, - attrs: Optional[Dict[str, Any]] = None, - ) -> bytes: - raise NotImplementedError - -else: # Fallback for older Python - from typing import Callable - - DecipherCallable = Callable[..., bytes] + def __call__( + self, + objid: int, + genno: int, + data: bytes, + attrs: Optional[Dict[str, Any]] = None, + ) -> bytes: + raise NotImplementedError class PDFObject(PSObject): @@ -319,7 +311,7 @@ def decode(self) -> None: except zlib.error as e: if settings.STRICT: - error_msg = "Invalid zlib bytes: {!r}, {!r}".format(e, data) + error_msg = f"Invalid zlib bytes: {e!r}, {data!r}" raise PDFException(error_msg) try: diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index cbaba002..36172c7a 100755 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -120,7 +120,7 @@ def intern(self, name: PSLiteral.NameType) -> _SymbolT: def literal_name(x: object) -> Any: if not isinstance(x, PSLiteral): if settings.STRICT: - raise PSTypeError("Literal required: {!r}".format(x)) + raise PSTypeError(f"Literal required: {x!r}") else: name = x else: @@ -580,7 +580,7 @@ def start_type(self, pos: int, type: str) -> None: def end_type(self, type: str) -> Tuple[int, List[PSStackType[ExtraT]]]: if self.curtype != type: - raise PSTypeError("Type mismatch: {!r} != {!r}".format(self.curtype, type)) + raise PSTypeError(f"Type mismatch: {self.curtype!r} != {type!r}") objs = [obj for (_, obj) in self.curstack] (pos, self.curtype, self.curstack) = self.context.pop() log.debug("end_type: pos=%r, type=%r, objs=%r", pos, type, objs) diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 93bcd449..fae1f643 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -41,7 +41,7 @@ AnyIO = Union[TextIO, BinaryIO] -class open_filename(object): +class open_filename: """ Context manager that allows opening a filename (str or pathlib.PurePath type is supported) and closes it on exit, @@ -91,7 +91,7 @@ def shorten_str(s: str, size: int) -> str: return s[:size] if len(s) > size: length = (size - 5) // 2 - return "{} ... {}".format(s[:length], s[-length:]) + return f"{s[:length]} ... {s[-length:]}" else: return s @@ -645,12 +645,12 @@ def enc(x: str) -> str: def bbox2str(bbox: Rect) -> str: (x0, y0, x1, y1) = bbox - return "{:.3f},{:.3f},{:.3f},{:.3f}".format(x0, y0, x1, y1) + return f"{x0:.3f},{y0:.3f},{x1:.3f},{y1:.3f}" def matrix2str(m: Matrix) -> str: (a, b, c, d, e, f) = m - return "[{:.2f},{:.2f},{:.2f},{:.2f}, ({:.2f},{:.2f})]".format(a, b, c, d, e, f) + return f"[{a:.2f},{b:.2f},{c:.2f},{d:.2f}, ({e:.2f},{f:.2f})]" def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point: diff --git a/setup.py b/setup.py index 96595f0f..d50bc033 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ from setuptools import setup root_dir = Path(__file__).parent -with open(root_dir / "README.md", "rt") as f: +with open(root_dir / "README.md") as f: readme = f.read() extras_require = { diff --git a/tests/test_pdfdocument.py b/tests/test_pdfdocument.py index e2643cd7..594e7abe 100644 --- a/tests/test_pdfdocument.py +++ b/tests/test_pdfdocument.py @@ -9,7 +9,7 @@ from tests.helpers import absolute_sample_path -class TestPdfDocument(object): +class TestPdfDocument: def test_get_zero_objid_raises_pdfobjectnotfound(self): with open(absolute_sample_path("simple1.pdf"), "rb") as in_file: parser = PDFParser(in_file) diff --git a/tests/test_pdfpage.py b/tests/test_pdfpage.py index c3fe86c2..a99a2f47 100644 --- a/tests/test_pdfpage.py +++ b/tests/test_pdfpage.py @@ -4,7 +4,7 @@ from tests.helpers import absolute_sample_path -class TestPdfPage(object): +class TestPdfPage: def test_page_labels(self): path = absolute_sample_path("contrib/pagelabels.pdf") expected_labels = ["iii", "iv", "1", "2", "1"] diff --git a/tests/test_tools_dumppdf.py b/tests/test_tools_dumppdf.py index 971c3d07..735556e2 100644 --- a/tests/test_tools_dumppdf.py +++ b/tests/test_tools_dumppdf.py @@ -11,9 +11,9 @@ def run(filename, options=None): absolute_path = absolute_sample_path(filename) with TemporaryFilePath() as output_file_name: if options: - s = "dumppdf -o %s %s %s" % (output_file_name, options, absolute_path) + s = "dumppdf -o {} {} {}".format(output_file_name, options, absolute_path) else: - s = "dumppdf -o %s %s" % (output_file_name, absolute_path) + s = "dumppdf -o {} {}".format(output_file_name, absolute_path) dumppdf.main(s.split(" ")[1:]) diff --git a/tests/test_tools_pdf2txt.py b/tests/test_tools_pdf2txt.py index ccbe55f9..a6e0ee1a 100644 --- a/tests/test_tools_pdf2txt.py +++ b/tests/test_tools_pdf2txt.py @@ -12,9 +12,9 @@ def run(sample_path, options=None): absolute_path = absolute_sample_path(sample_path) with TemporaryFilePath() as output_file_name: if options: - s = "pdf2txt -o{} {} {}".format(output_file_name, options, absolute_path) + s = f"pdf2txt -o{output_file_name} {options} {absolute_path}" else: - s = "pdf2txt -o{} {}".format(output_file_name, absolute_path) + s = f"pdf2txt -o{output_file_name} {absolute_path}" pdf2txt.main(s.split(" ")[1:]) diff --git a/tests/test_utils.py b/tests/test_utils.py index 160b02b4..af37ce9a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -71,7 +71,7 @@ def given_plane_with_one_object(object_size=50, gridsize=50): return plane, obj -class TestFunctions(object): +class TestFunctions: def test_shorten_str(self): s = shorten_str("Hello there World", 15) assert s == "Hello ... World" diff --git a/tools/conv_afm.py b/tools/conv_afm.py index f666ee18..2d663cc1 100755 --- a/tools/conv_afm.py +++ b/tools/conv_afm.py @@ -36,7 +36,7 @@ def main(argv): print("# -*- python -*-") print("FONT_METRICS = {") for (fontname, (props, chars)) in fonts.items(): - print(" {!r}: {!r},".format(fontname, (props, chars))) + print(f" {fontname!r}: {(props, chars)!r},") print("}") return 0 diff --git a/tools/dumppdf.py b/tools/dumppdf.py index 6e19e275..b870b400 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -196,7 +196,7 @@ def resolve_dest(dest: object) -> Any: dest = resolve_dest(action["D"]) pageno = pages[dest[0].objid] s = escape(title) - outfp.write('\n'.format(level, s)) + outfp.write(f'\n') if dest is not None: outfp.write("") dumpxml(outfp, dest) @@ -310,7 +310,7 @@ def create_parser() -> ArgumentParser: "--version", "-v", action="version", - version="pdfminer.six v{}".format(pdfminer.__version__), + version=f"pdfminer.six v{pdfminer.__version__}", ) parser.add_argument( "--debug", diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index fe158593..1aaee573 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -22,7 +22,7 @@ def float_or_disabled(x: str) -> Optional[float]: try: return float(x) except ValueError: - raise argparse.ArgumentTypeError("invalid float value: {}".format(x)) + raise argparse.ArgumentTypeError(f"invalid float value: {x}") def extract_text( @@ -41,7 +41,7 @@ def extract_text( output_dir: Optional[str] = None, debug: bool = False, disable_caching: bool = False, - **kwargs: Any + **kwargs: Any, ) -> AnyIO: if not files: raise PDFValueError("Must provide files to work upon!") @@ -78,7 +78,7 @@ def create_parser() -> argparse.ArgumentParser: "--version", "-v", action="version", - version="pdfminer.six v{}".format(pdfminer.__version__), + version=f"pdfminer.six v{pdfminer.__version__}", ) parser.add_argument( "--debug",