From e81810d83d5c9ace84905d36064d1e35ba5188cf Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Wed, 24 May 2023 10:00:20 +0100 Subject: [PATCH 1/4] gh-104825: Remove implicit newline in the line attribute in tokens emitted in the tokenize module --- Lib/test/test_tokenize.py | 4 ++-- Lib/tokenize.py | 4 ++-- .../2023-05-24-09-59-56.gh-issue-104825.mQesie.rst | 2 ++ Python/Python-tokenize.c | 4 ++++ 4 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 8e7ab3d4b7b578..fd9c919ce6a0d1 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -103,7 +103,7 @@ def k(x): e.exception.msg, 'unindent does not match any outer indentation level') self.assertEqual(e.exception.offset, 9) - self.assertEqual(e.exception.text, ' x += 5\n') + self.assertEqual(e.exception.text, ' x += 5') def test_int(self): # Ordinary integers and binary operators @@ -1157,7 +1157,7 @@ def readline(): # skip the initial encoding token and the end tokens tokens = list(_tokenize(readline(), encoding='utf-8'))[:-2] - expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"\n')] + expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] self.assertEqual(tokens, expected_tokens, "bytes not decoded with encoding") diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 911f0f12f9bb7e..d2cf1d5b038277 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -518,8 +518,8 @@ def error(message, filename=None, location=None): if args.exact: token_type = token.exact_type token_range = "%d,%d-%d,%d:" % (token.start + token.end) - print("%-20s%-15s%-15r" % - (token_range, tok_name[token_type], token.string)) + print("%-20s%-15s%-15r%-15r" % + (token_range, tok_name[token_type], token.string, token.line)) except IndentationError as err: line, column = err.args[1][1:3] error(err.args[0], filename, (line, column)) diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst b/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst new file mode 100644 index 00000000000000..caf5d3527085f3 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst @@ -0,0 +1,2 @@ +Tokens emitted by the :mod:`tokenize` module do not include an implicit +``\n`` character in the ``line`` attribute anymore. Patch by Pablo Galindo diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index f7e32d3af9a9f7..0023e303b96e83 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -123,6 +123,8 @@ _tokenizer_error(struct tok_state *tok) int result = 0; Py_ssize_t size = tok->inp - tok->buf; + assert(tok->buf[size-1] == '\n'); + size -= 1; // Remove the newline character from the end of the line error_line = PyUnicode_DecodeUTF8(tok->buf, size, "replace"); if (!error_line) { result = -1; @@ -193,6 +195,8 @@ tokenizeriter_next(tokenizeriterobject *it) } Py_ssize_t size = it->tok->inp - it->tok->buf; + assert(it->tok->buf[size-1] == '\n'); + size -= 1; // Remove the newline character from the end of the line PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace"); if (line == NULL) { Py_DECREF(str); From aa7bcc3a32f25817aad6b4ff4b4580d319555231 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 24 May 2023 10:04:26 +0100 Subject: [PATCH 2/4] Update Lib/tokenize.py --- Lib/test/test_tabnanny.py | 8 ++++---- Lib/tokenize.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_tabnanny.py b/Lib/test/test_tabnanny.py index aa700118f735d9..cc122cafc7985c 100644 --- a/Lib/test/test_tabnanny.py +++ b/Lib/test/test_tabnanny.py @@ -222,7 +222,7 @@ def test_when_nannynag_error_verbose(self): """ with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as file_path: out = f"{file_path!r}: *** Line 3: trouble in tab city! ***\n" - out += "offending line: '\\tprint(\"world\")\\n'\n" + out += "offending line: '\\tprint(\"world\")'\n" out += "inconsistent use of tabs and spaces in indentation\n" tabnanny.verbose = 1 @@ -231,7 +231,7 @@ def test_when_nannynag_error_verbose(self): def test_when_nannynag_error(self): """A python source code file eligible for raising `tabnanny.NannyNag`.""" with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as file_path: - out = f"{file_path} 3 '\\tprint(\"world\")\\n'\n" + out = f"{file_path} 3 '\\tprint(\"world\")'\n" self.verify_tabnanny_check(file_path, out=out) def test_when_no_file(self): @@ -341,7 +341,7 @@ def test_verbose_mode(self): """Should display more error information if verbose mode is on.""" with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as path: stdout = textwrap.dedent( - "offending line: '\\tprint(\"world\")\\n'" + "offending line: '\\tprint(\"world\")'" ).strip() self.validate_cmd("-v", path, stdout=stdout, partial=True) @@ -349,6 +349,6 @@ def test_double_verbose_mode(self): """Should display detailed error information if double verbose is on.""" with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as path: stdout = textwrap.dedent( - "offending line: '\\tprint(\"world\")\\n'" + "offending line: '\\tprint(\"world\")'" ).strip() self.validate_cmd("-vv", path, stdout=stdout, partial=True) diff --git a/Lib/tokenize.py b/Lib/tokenize.py index d2cf1d5b038277..911f0f12f9bb7e 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -518,8 +518,8 @@ def error(message, filename=None, location=None): if args.exact: token_type = token.exact_type token_range = "%d,%d-%d,%d:" % (token.start + token.end) - print("%-20s%-15s%-15r%-15r" % - (token_range, tok_name[token_type], token.string, token.line)) + print("%-20s%-15s%-15r" % + (token_range, tok_name[token_type], token.string)) except IndentationError as err: line, column = err.args[1][1:3] error(err.args[0], filename, (line, column)) From 4d39ad44778b9357ccb9b79c88d2f448b3e4e007 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Wed, 24 May 2023 10:34:41 +0100 Subject: [PATCH 3/4] Fix IDLE tests --- Lib/idlelib/idle_test/test_editor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/idlelib/idle_test/test_editor.py b/Lib/idlelib/idle_test/test_editor.py index 9296a6d235fbbe..ba59c40dc6dde5 100644 --- a/Lib/idlelib/idle_test/test_editor.py +++ b/Lib/idlelib/idle_test/test_editor.py @@ -201,8 +201,8 @@ def test_searcher(self): test_info = (# text, (block, indent)) ("", (None, None)), ("[1,", (None, None)), # TokenError - ("if 1:\n", ('if 1:\n', None)), - ("if 1:\n 2\n 3\n", ('if 1:\n', ' 2\n')), + ("if 1:\n", ('if 1:', None)), + ("if 1:\n 2\n 3\n", ('if 1:', ' 2')), ) for code, expected_pair in test_info: with self.subTest(code=code): From bddd4ccba736d27e295d40802e4f314f322e1abd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 1 Jul 2023 19:41:10 +0000 Subject: [PATCH 4/4] build(deps): bump mheap/github-action-required-labels from 4 to 5 Bumps [mheap/github-action-required-labels](https://github.com/mheap/github-action-required-labels) from 4 to 5. - [Release notes](https://github.com/mheap/github-action-required-labels/releases) - [Commits](https://github.com/mheap/github-action-required-labels/compare/v4...v5) --- updated-dependencies: - dependency-name: mheap/github-action-required-labels dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/require-pr-label.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/require-pr-label.yml b/.github/workflows/require-pr-label.yml index 88aaea039f04f4..9327b43ae02710 100644 --- a/.github/workflows/require-pr-label.yml +++ b/.github/workflows/require-pr-label.yml @@ -15,7 +15,7 @@ jobs: timeout-minutes: 10 steps: - - uses: mheap/github-action-required-labels@v4 + - uses: mheap/github-action-required-labels@v5 with: mode: exactly count: 0