From 6bfb0be80486c614cd60dce44c9fe7b3e6c76e3b Mon Sep 17 00:00:00 2001 From: larryhastings Date: Thu, 6 Oct 2022 12:23:20 -0700 Subject: [PATCH 01/11] gh-97943: PyFunction_GetAnnotations should return a borrowed reference. (#97949) --- .../2022-10-05-17-02-22.gh-issue-97943.LYAWlE.rst | 2 ++ Objects/funcobject.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-10-05-17-02-22.gh-issue-97943.LYAWlE.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-05-17-02-22.gh-issue-97943.LYAWlE.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-05-17-02-22.gh-issue-97943.LYAWlE.rst new file mode 100644 index 00000000000000..9b4a421a9d475a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-05-17-02-22.gh-issue-97943.LYAWlE.rst @@ -0,0 +1,2 @@ +Bugfix: :func:`PyFunction_GetAnnotations` should return a borrowed +reference. It was returning a new reference. diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 7f257a9986987b..ccc6d0b52eab68 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -311,7 +311,6 @@ func_get_annotation_dict(PyFunctionObject *op) } Py_SETREF(op->func_annotations, ann_dict); } - Py_INCREF(op->func_annotations); assert(PyDict_Check(op->func_annotations)); return op->func_annotations; } @@ -543,7 +542,11 @@ func_get_annotations(PyFunctionObject *op, void *Py_UNUSED(ignored)) if (op->func_annotations == NULL) return NULL; } - return func_get_annotation_dict(op); + PyObject *d = func_get_annotation_dict(op); + if (d) { + Py_INCREF(d); + } + return d; } static int From 2b5f1360ead9aa72ae00de59edfd6c229d13933f Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy Date: Thu, 6 Oct 2022 15:24:17 -0400 Subject: [PATCH 02/11] gh-86482: Document assignment expression need for ()s (#23291) Co-authored-by: Jelle Zijlstra --- Doc/reference/expressions.rst | 7 +++++++ .../2020-11-15-02-08-43.bpo-42316.LqdkWK.rst | 1 + 2 files changed, 8 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-11-15-02-08-43.bpo-42316.LqdkWK.rst diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index a6ca55dafe5365..a661e03b173498 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1766,6 +1766,13 @@ Or, when processing a file stream in chunks: while chunk := file.read(9000): process(chunk) +Assignment expressions must be surrounded by parentheses when used +as sub-expressions in slicing, conditional, lambda, +keyword-argument, and comprehension-if expressions +and in ``assert`` and ``with`` statements. +In all other places where they can be used, parentheses are not required, +including in ``if`` and ``while`` statements. + .. versionadded:: 3.8 See :pep:`572` for more details about assignment expressions. diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-11-15-02-08-43.bpo-42316.LqdkWK.rst b/Misc/NEWS.d/next/Core and Builtins/2020-11-15-02-08-43.bpo-42316.LqdkWK.rst new file mode 100644 index 00000000000000..ea997800bf076e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-11-15-02-08-43.bpo-42316.LqdkWK.rst @@ -0,0 +1 @@ +Document some places where an assignment expression needs parentheses. From 8af04cdef202364541540ed67e204b71e2e759d0 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Thu, 6 Oct 2022 15:25:24 -0400 Subject: [PATCH 03/11] gh-97781: Apply changes from importlib_metadata 5. (GH-97785) * gh-97781: Apply changes from importlib_metadata 5. * Apply changes from upstream * Apply changes from upstream. --- Doc/library/importlib.metadata.rst | 108 ++++++--- Lib/importlib/metadata/__init__.py | 211 +----------------- Lib/test/test_importlib/test_main.py | 10 - Lib/test/test_importlib/test_metadata_api.py | 56 ----- ...2-10-03-13-25-19.gh-issue-97781.gCLLef.rst | 5 + 5 files changed, 88 insertions(+), 302 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-10-03-13-25-19.gh-issue-97781.gCLLef.rst diff --git a/Doc/library/importlib.metadata.rst b/Doc/library/importlib.metadata.rst index a1af7a754ba4ef..094c2688a8cd87 100644 --- a/Doc/library/importlib.metadata.rst +++ b/Doc/library/importlib.metadata.rst @@ -13,21 +13,39 @@ **Source code:** :source:`Lib/importlib/metadata/__init__.py` -``importlib.metadata`` is a library that provides access to installed -package metadata, such as its entry points or its -top-level name. Built in part on Python's import system, this library +``importlib_metadata`` is a library that provides access to +the metadata of an installed `Distribution Package `_, +such as its entry points +or its top-level names (`Import Package `_\s, modules, if any). +Built in part on Python's import system, this library intends to replace similar functionality in the `entry point API`_ and `metadata API`_ of ``pkg_resources``. Along with :mod:`importlib.resources`, this package can eliminate the need to use the older and less efficient ``pkg_resources`` package. -By "installed package" we generally mean a third-party package installed into -Python's ``site-packages`` directory via tools such as `pip -`_. Specifically, -it means a package with either a discoverable ``dist-info`` or ``egg-info`` -directory, and metadata defined by :pep:`566` or its older specifications. -By default, package metadata can live on the file system or in zip archives on +``importlib_metadata`` operates on third-party *distribution packages* +installed into Python's ``site-packages`` directory via tools such as +`pip `_. +Specifically, it works with distributions with discoverable +``dist-info`` or ``egg-info`` directories, +and metadata defined by the `Core metadata specifications `_. + +.. important:: + + These are *not* necessarily equivalent to or correspond 1:1 with + the top-level *import package* names + that can be imported inside Python code. + One *distribution package* can contain multiple *import packages* + (and single modules), + and one top-level *import package* + may map to multiple *distribution packages* + if it is a namespace package. + You can use :ref:`package_distributions() ` + to get a mapping between them. + +By default, distribution metadata can live on the file system +or in zip archives on :data:`sys.path`. Through an extension mechanism, the metadata can live almost anywhere. @@ -37,12 +55,19 @@ anywhere. https://importlib-metadata.readthedocs.io/ The documentation for ``importlib_metadata``, which supplies a backport of ``importlib.metadata``. + This includes an `API reference + `__ + for this module's classes and functions, + as well as a `migration guide + `__ + for existing users of ``pkg_resources``. Overview ======== -Let's say you wanted to get the version string for a package you've installed +Let's say you wanted to get the version string for a +`Distribution Package `_ you've installed using ``pip``. We start by creating a virtual environment and installing something into it: @@ -151,11 +176,10 @@ for more information on entry points, their definition, and usage. The "selectable" entry points were introduced in ``importlib_metadata`` 3.6 and Python 3.10. Prior to those changes, ``entry_points`` accepted no parameters and always returned a dictionary of entry points, keyed -by group. For compatibility, if no parameters are passed to entry_points, -a ``SelectableGroups`` object is returned, implementing that dict -interface. In the future, calling ``entry_points`` with no parameters -will return an ``EntryPoints`` object. Users should rely on the selection -interface to retrieve entry points by group. +by group. With ``importlib_metadata`` 5.0 and Python 3.12, +``entry_points`` always returns an ``EntryPoints`` object. See +`backports.entry_points_selectable `_ +for compatibility options. .. _metadata: @@ -163,7 +187,8 @@ interface to retrieve entry points by group. Distribution metadata --------------------- -Every distribution includes some metadata, which you can extract using the +Every `Distribution Package `_ includes some metadata, +which you can extract using the ``metadata()`` function:: >>> wheel_metadata = metadata('wheel') # doctest: +SKIP @@ -201,7 +226,8 @@ all the metadata in a JSON-compatible form per :PEP:`566`:: Distribution versions --------------------- -The ``version()`` function is the quickest way to get a distribution's version +The ``version()`` function is the quickest way to get a +`Distribution Package `_'s version number, as a string:: >>> version('wheel') # doctest: +SKIP @@ -214,7 +240,8 @@ Distribution files ------------------ You can also get the full set of files contained within a distribution. The -``files()`` function takes a distribution package name and returns all of the +``files()`` function takes a `Distribution Package `_ name +and returns all of the files installed by this distribution. Each file object returned is a ``PackagePath``, a :class:`pathlib.PurePath` derived object with additional ``dist``, ``size``, and ``hash`` properties as indicated by the metadata. For example:: @@ -259,19 +286,24 @@ distribution is not known to have the metadata present. Distribution requirements ------------------------- -To get the full set of requirements for a distribution, use the ``requires()`` +To get the full set of requirements for a `Distribution Package `_, +use the ``requires()`` function:: >>> requires('wheel') # doctest: +SKIP ["pytest (>=3.0.0) ; extra == 'test'", "pytest-cov ; extra == 'test'"] -Package distributions ---------------------- +.. _package-distributions: +.. _import-distribution-package-mapping: + +Mapping import to distribution packages +--------------------------------------- -A convenience method to resolve the distribution or -distributions (in the case of a namespace package) for top-level -Python packages or modules:: +A convenience method to resolve the `Distribution Package `_ +name (or names, in the case of a namespace package) +that provide each importable top-level +Python module or `Import Package `_:: >>> packages_distributions() {'importlib_metadata': ['importlib-metadata'], 'yaml': ['PyYAML'], 'jaraco': ['jaraco.classes', 'jaraco.functools'], ...} @@ -285,7 +317,8 @@ Distributions While the above API is the most common and convenient usage, you can get all of that information from the ``Distribution`` class. A ``Distribution`` is an -abstract object that represents the metadata for a Python package. You can +abstract object that represents the metadata for +a Python `Distribution Package `_. You can get the ``Distribution`` instance:: >>> from importlib.metadata import distribution # doctest: +SKIP @@ -305,14 +338,16 @@ instance:: >>> dist.metadata['License'] # doctest: +SKIP 'MIT' -The full set of available metadata is not described here. See :pep:`566` -for additional details. +The full set of available metadata is not described here. +See the `Core metadata specifications `_ for additional details. Distribution Discovery ====================== -By default, this package provides built-in support for discovery of metadata for file system and zip file packages. This metadata finder search defaults to ``sys.path``, but varies slightly in how it interprets those values from how other import machinery does. In particular: +By default, this package provides built-in support for discovery of metadata +for file system and zip file `Distribution Package `_\s. +This metadata finder search defaults to ``sys.path``, but varies slightly in how it interprets those values from how other import machinery does. In particular: - ``importlib.metadata`` does not honor :class:`bytes` objects on ``sys.path``. - ``importlib.metadata`` will incidentally honor :py:class:`pathlib.Path` objects on ``sys.path`` even though such values will be ignored for imports. @@ -321,15 +356,18 @@ By default, this package provides built-in support for discovery of metadata for Extending the search algorithm ============================== -Because package metadata is not available through :data:`sys.path` searches, or -package loaders directly, the metadata for a package is found through import -system :ref:`finders `. To find a distribution package's metadata, +Because `Distribution Package `_ metadata +is not available through :data:`sys.path` searches, or +package loaders directly, +the metadata for a distribution is found through import +system `finders`_. To find a distribution package's metadata, ``importlib.metadata`` queries the list of :term:`meta path finders ` on :data:`sys.meta_path`. -The default ``PathFinder`` for Python includes a hook that calls into -``importlib.metadata.MetadataPathFinder`` for finding distributions -loaded from typical file-system-based paths. +By default ``importlib_metadata`` installs a finder for distribution packages +found on the file system. +This finder doesn't actually find any *distributions*, +but it can find their metadata. The abstract class :py:class:`importlib.abc.MetaPathFinder` defines the interface expected of finders by Python's import system. @@ -358,4 +396,4 @@ a custom finder, return instances of this derived ``Distribution`` in the .. _`entry point API`: https://setuptools.readthedocs.io/en/latest/pkg_resources.html#entry-points .. _`metadata API`: https://setuptools.readthedocs.io/en/latest/pkg_resources.html#metadata-api -.. _`importlib_resources`: https://importlib-resources.readthedocs.io/en/latest/index.html +.. _`finders`: https://docs.python.org/3/reference/import.html#finders-and-loaders diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index b01de145c33671..40ab1a1aaac328 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -24,7 +24,7 @@ from importlib import import_module from importlib.abc import MetaPathFinder from itertools import starmap -from typing import List, Mapping, Optional, Union +from typing import List, Mapping, Optional __all__ = [ @@ -134,6 +134,7 @@ class DeprecatedTuple: 1 """ + # Do not remove prior to 2023-05-01 or Python 3.13 _warn = functools.partial( warnings.warn, "EntryPoint tuple interface is deprecated. Access members by name.", @@ -184,6 +185,10 @@ class EntryPoint(DeprecatedTuple): following the attr, and following any extras. """ + name: str + value: str + group: str + dist: Optional['Distribution'] = None def __init__(self, name, value, group): @@ -218,17 +223,6 @@ def _for(self, dist): vars(self).update(dist=dist) return self - def __iter__(self): - """ - Supply iter so one may construct dicts of EntryPoints by name. - """ - msg = ( - "Construction of dict of EntryPoints is deprecated in " - "favor of EntryPoints." - ) - warnings.warn(msg, DeprecationWarning) - return iter((self.name, self)) - def matches(self, **params): """ EntryPoint matches the given parameters. @@ -274,77 +268,7 @@ def __hash__(self): return hash(self._key()) -class DeprecatedList(list): - """ - Allow an otherwise immutable object to implement mutability - for compatibility. - - >>> recwarn = getfixture('recwarn') - >>> dl = DeprecatedList(range(3)) - >>> dl[0] = 1 - >>> dl.append(3) - >>> del dl[3] - >>> dl.reverse() - >>> dl.sort() - >>> dl.extend([4]) - >>> dl.pop(-1) - 4 - >>> dl.remove(1) - >>> dl += [5] - >>> dl + [6] - [1, 2, 5, 6] - >>> dl + (6,) - [1, 2, 5, 6] - >>> dl.insert(0, 0) - >>> dl - [0, 1, 2, 5] - >>> dl == [0, 1, 2, 5] - True - >>> dl == (0, 1, 2, 5) - True - >>> len(recwarn) - 1 - """ - - __slots__ = () - - _warn = functools.partial( - warnings.warn, - "EntryPoints list interface is deprecated. Cast to list if needed.", - DeprecationWarning, - stacklevel=2, - ) - - def _wrap_deprecated_method(method_name: str): # type: ignore - def wrapped(self, *args, **kwargs): - self._warn() - return getattr(super(), method_name)(*args, **kwargs) - - return method_name, wrapped - - locals().update( - map( - _wrap_deprecated_method, - '__setitem__ __delitem__ append reverse extend pop remove ' - '__iadd__ insert sort'.split(), - ) - ) - - def __add__(self, other): - if not isinstance(other, tuple): - self._warn() - other = tuple(other) - return self.__class__(tuple(self) + other) - - def __eq__(self, other): - if not isinstance(other, tuple): - self._warn() - other = tuple(other) - - return tuple(self).__eq__(other) - - -class EntryPoints(DeprecatedList): +class EntryPoints(tuple): """ An immutable collection of selectable EntryPoint objects. """ @@ -355,14 +279,6 @@ def __getitem__(self, name): # -> EntryPoint: """ Get the EntryPoint in self matching name. """ - if isinstance(name, int): - warnings.warn( - "Accessing entry points by index is deprecated. " - "Cast to tuple if needed.", - DeprecationWarning, - stacklevel=2, - ) - return super().__getitem__(name) try: return next(iter(self.select(name=name))) except StopIteration: @@ -386,10 +302,6 @@ def names(self): def groups(self): """ Return the set of all groups of all entry points. - - For coverage while SelectableGroups is present. - >>> EntryPoints().groups - set() """ return {ep.group for ep in self} @@ -405,101 +317,6 @@ def _from_text(text): ) -class Deprecated: - """ - Compatibility add-in for mapping to indicate that - mapping behavior is deprecated. - - >>> recwarn = getfixture('recwarn') - >>> class DeprecatedDict(Deprecated, dict): pass - >>> dd = DeprecatedDict(foo='bar') - >>> dd.get('baz', None) - >>> dd['foo'] - 'bar' - >>> list(dd) - ['foo'] - >>> list(dd.keys()) - ['foo'] - >>> 'foo' in dd - True - >>> list(dd.values()) - ['bar'] - >>> len(recwarn) - 1 - """ - - _warn = functools.partial( - warnings.warn, - "SelectableGroups dict interface is deprecated. Use select.", - DeprecationWarning, - stacklevel=2, - ) - - def __getitem__(self, name): - self._warn() - return super().__getitem__(name) - - def get(self, name, default=None): - self._warn() - return super().get(name, default) - - def __iter__(self): - self._warn() - return super().__iter__() - - def __contains__(self, *args): - self._warn() - return super().__contains__(*args) - - def keys(self): - self._warn() - return super().keys() - - def values(self): - self._warn() - return super().values() - - -class SelectableGroups(Deprecated, dict): - """ - A backward- and forward-compatible result from - entry_points that fully implements the dict interface. - """ - - @classmethod - def load(cls, eps): - by_group = operator.attrgetter('group') - ordered = sorted(eps, key=by_group) - grouped = itertools.groupby(ordered, by_group) - return cls((group, EntryPoints(eps)) for group, eps in grouped) - - @property - def _all(self): - """ - Reconstruct a list of all entrypoints from the groups. - """ - groups = super(Deprecated, self).values() - return EntryPoints(itertools.chain.from_iterable(groups)) - - @property - def groups(self): - return self._all.groups - - @property - def names(self): - """ - for coverage: - >>> SelectableGroups().names - set() - """ - return self._all.names - - def select(self, **params): - if not params: - return self - return self._all.select(**params) - - class PackagePath(pathlib.PurePosixPath): """A reference to a path in a package""" @@ -1013,27 +830,19 @@ def version(distribution_name): """ -def entry_points(**params) -> Union[EntryPoints, SelectableGroups]: +def entry_points(**params) -> EntryPoints: """Return EntryPoint objects for all installed packages. Pass selection parameters (group or name) to filter the result to entry points matching those properties (see EntryPoints.select()). - For compatibility, returns ``SelectableGroups`` object unless - selection parameters are supplied. In the future, this function - will return ``EntryPoints`` instead of ``SelectableGroups`` - even when no selection parameters are supplied. - - For maximum future compatibility, pass selection parameters - or invoke ``.select`` with parameters on the result. - - :return: EntryPoints or SelectableGroups for all installed packages. + :return: EntryPoints for all installed packages. """ eps = itertools.chain.from_iterable( dist.entry_points for dist in _unique(distributions()) ) - return SelectableGroups.load(eps).select(**params) + return EntryPoints(eps).select(**params) def files(distribution_name): diff --git a/Lib/test/test_importlib/test_main.py b/Lib/test/test_importlib/test_main.py index d9d067c4b23d66..30b68b6ae7d86e 100644 --- a/Lib/test/test_importlib/test_main.py +++ b/Lib/test/test_importlib/test_main.py @@ -1,8 +1,6 @@ import re -import json import pickle import unittest -import warnings import importlib.metadata try: @@ -260,14 +258,6 @@ def test_hashable(self): """EntryPoints should be hashable""" hash(self.ep) - def test_json_dump(self): - """ - json should not expect to be able to dump an EntryPoint - """ - with self.assertRaises(Exception): - with warnings.catch_warnings(record=True): - json.dumps(self.ep) - def test_module(self): assert self.ep.module == 'value' diff --git a/Lib/test/test_importlib/test_metadata_api.py b/Lib/test/test_importlib/test_metadata_api.py index 69c78e9820c044..71c47e62d27124 100644 --- a/Lib/test/test_importlib/test_metadata_api.py +++ b/Lib/test/test_importlib/test_metadata_api.py @@ -124,62 +124,6 @@ def test_entry_points_missing_name(self): def test_entry_points_missing_group(self): assert entry_points(group='missing') == () - def test_entry_points_dict_construction(self): - """ - Prior versions of entry_points() returned simple lists and - allowed casting those lists into maps by name using ``dict()``. - Capture this now deprecated use-case. - """ - with suppress_known_deprecation() as caught: - eps = dict(entry_points(group='entries')) - - assert 'main' in eps - assert eps['main'] == entry_points(group='entries')['main'] - - # check warning - expected = next(iter(caught)) - assert expected.category is DeprecationWarning - assert "Construction of dict of EntryPoints is deprecated" in str(expected) - - def test_entry_points_by_index(self): - """ - Prior versions of Distribution.entry_points would return a - tuple that allowed access by index. - Capture this now deprecated use-case - See python/importlib_metadata#300 and bpo-44246. - """ - eps = distribution('distinfo-pkg').entry_points - with suppress_known_deprecation() as caught: - eps[0] - - # check warning - expected = next(iter(caught)) - assert expected.category is DeprecationWarning - assert "Accessing entry points by index is deprecated" in str(expected) - - def test_entry_points_groups_getitem(self): - """ - Prior versions of entry_points() returned a dict. Ensure - that callers using '.__getitem__()' are supported but warned to - migrate. - """ - with suppress_known_deprecation(): - entry_points()['entries'] == entry_points(group='entries') - - with self.assertRaises(KeyError): - entry_points()['missing'] - - def test_entry_points_groups_get(self): - """ - Prior versions of entry_points() returned a dict. Ensure - that callers using '.get()' are supported but warned to - migrate. - """ - with suppress_known_deprecation(): - entry_points().get('missing', 'default') == 'default' - entry_points().get('entries', 'default') == entry_points()['entries'] - entry_points().get('missing', ()) == () - def test_entry_points_allows_no_attributes(self): ep = entry_points().select(group='entries', name='main') with self.assertRaises(AttributeError): diff --git a/Misc/NEWS.d/next/Library/2022-10-03-13-25-19.gh-issue-97781.gCLLef.rst b/Misc/NEWS.d/next/Library/2022-10-03-13-25-19.gh-issue-97781.gCLLef.rst new file mode 100644 index 00000000000000..8c36d9c3afd2dd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-10-03-13-25-19.gh-issue-97781.gCLLef.rst @@ -0,0 +1,5 @@ +Removed deprecated interfaces in ``importlib.metadata`` (entry points +accessed as dictionary, implicit dictionary construction of sequence of +``EntryPoint`` objects, mutablility of ``EntryPoints`` result, access of +entry point by index). ``entry_points`` now has a simpler, more +straightforward API (returning ``EntryPoints``). From effc25f7f25ae1ac053415addc584b050c022dcb Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Thu, 6 Oct 2022 13:29:52 -0700 Subject: [PATCH 04/11] Add Pynche's move to the What's new in 3.11 (#97974) * Add Pynche's move to the What's new in 3.11 * Update Doc/whatsnew/3.11.rst Co-authored-by: Ezio Melotti Co-authored-by: Ezio Melotti --- Doc/whatsnew/3.11.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index dafbbb673f0ed6..6103da76a34d03 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -1667,6 +1667,10 @@ Removed Python's test suite." (Contributed by Victor Stinner in :issue:`46852`.) +* Pynche --- The Pythonically Natural Color and Hue Editor --- has been moved out + of ``Tools/scripts`` and is `being developed independently + `_ from the Python source tree. + Porting to Python 3.11 ====================== From 1c4728cc299953c61e0e1cdb077eefccfd555b87 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 6 Oct 2022 15:35:53 -0500 Subject: [PATCH 05/11] gh-94590: add signatures to operator itemgetter, attrgetter, methodcaller (#94591) These were intentionally skipped when operator was updated to use the argument clinic: https://github.com/python/cpython/issues/64385#issuecomment-1093641466 However, by not using the argument clinic, they missed out on getting signatures. This is a narrow PR to update the docstrings so that `__text_signature__` can be extracted from them. Updating to use the argument clinic is beyond scope. `methodcaller` uses `*args, **kwargs` to match variadic names used elsewhere, including in `operator.call`. --- Modules/_operator.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Modules/_operator.c b/Modules/_operator.c index 51ca74eeeaee21..77eabdb57af921 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1162,8 +1162,7 @@ static PyMemberDef itemgetter_members[] = { }; PyDoc_STRVAR(itemgetter_doc, -"itemgetter(item, ...) --> itemgetter object\n\ -\n\ +"itemgetter(item, /, *items)\n--\n\n\ Return a callable object that fetches the given item(s) from its operand.\n\ After f = itemgetter(2), the call f(r) returns r[2].\n\ After g = itemgetter(2, 5, 3), the call g(r) returns (r[2], r[5], r[3])"); @@ -1523,8 +1522,7 @@ static PyMemberDef attrgetter_members[] = { }; PyDoc_STRVAR(attrgetter_doc, -"attrgetter(attr, ...) --> attrgetter object\n\ -\n\ +"attrgetter(attr, /, *attrs)\n--\n\n\ Return a callable object that fetches the given attribute(s) from its operand.\n\ After f = attrgetter('name'), the call f(r) returns r.name.\n\ After g = attrgetter('name', 'date'), the call g(r) returns (r.name, r.date).\n\ @@ -1775,8 +1773,7 @@ static PyMethodDef methodcaller_methods[] = { {NULL} }; PyDoc_STRVAR(methodcaller_doc, -"methodcaller(name, ...) --> methodcaller object\n\ -\n\ +"methodcaller(name, /, *args, **kwargs)\n--\n\n\ Return a callable object that calls the given method on its operand.\n\ After f = methodcaller('name'), the call f(r) returns r.name().\n\ After g = methodcaller('name', 'date', foo=1), the call g(r) returns\n\ From 993de50e44ce275ad96857e73e144c556eea68b0 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Thu, 6 Oct 2022 13:58:41 -0700 Subject: [PATCH 06/11] Docs: pin sphinx-lint (GH-97992) --- Doc/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/requirements.txt b/Doc/requirements.txt index be058733fcf4d7..960ac54d7b91ef 100644 --- a/Doc/requirements.txt +++ b/Doc/requirements.txt @@ -10,7 +10,7 @@ blurb # sphinx-lint 0.6.2 yields many default role errors due to the new regular # expression used for default role detection, so we don't use the version # until the errors are fixed. -sphinx-lint<1,!=0.6.2 +sphinx-lint==0.6.1 # The theme used by the documentation is stored separately, so we need # to install that as well. From f8edc6ff531bb98858185857513371f14519ed1d Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Thu, 6 Oct 2022 14:01:06 -0700 Subject: [PATCH 07/11] gh-97850: Remove the open issues section from the import reference (#97935) Remove the open issues section from the import reference Tracking in https://github.com/python/cpython/issues/97850 instead. --- Doc/reference/import.rst | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/Doc/reference/import.rst b/Doc/reference/import.rst index 58f4ef897bdb7d..b7a53cd0886f29 100644 --- a/Doc/reference/import.rst +++ b/Doc/reference/import.rst @@ -1025,25 +1025,6 @@ and ``__main__.__spec__`` is set accordingly, they're still considered to populate the ``__main__`` namespace, and not during normal import. -Open issues -=========== - -XXX It would be really nice to have a diagram. - -XXX * (import_machinery.rst) how about a section devoted just to the -attributes of modules and packages, perhaps expanding upon or supplanting the -related entries in the data model reference page? - -XXX runpy, pkgutil, et al in the library manual should all get "See Also" -links at the top pointing to the new import system section. - -XXX Add more explanation regarding the different ways in which -``__main__`` is initialized? - -XXX Add more info on ``__main__`` quirks/pitfalls (i.e. copy from -:pep:`395`). - - References ========== From e1c4d56fdde28728c37de855edbb463fa0d7f95d Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Thu, 6 Oct 2022 15:40:22 -0700 Subject: [PATCH 08/11] gh-65961: Do not rely solely on `__cached__` (GH-97990) Make sure `__spec__.cached` (at minimum) can be used. --- Doc/c-api/import.rst | 9 +++ Doc/library/runpy.rst | 9 +++ Doc/whatsnew/3.12.rst | 4 +- Lib/cProfile.py | 8 ++- Lib/importlib/_bootstrap_external.py | 28 +++++--- Lib/inspect.py | 21 +----- Lib/profile.py | 8 ++- .../test_importlib/import_/test_helpers.py | 71 +++++++++++++++++++ Lib/test/test_inspect.py | 3 + Lib/test/test_pydoc.py | 2 +- ...2-10-06-17-59-22.gh-issue-65961.SXlQnI.rst | 2 + 11 files changed, 130 insertions(+), 35 deletions(-) create mode 100644 Lib/test/test_importlib/import_/test_helpers.py create mode 100644 Misc/NEWS.d/next/Library/2022-10-06-17-59-22.gh-issue-65961.SXlQnI.rst diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst index 0922956c607bc3..a51619db6d3d97 100644 --- a/Doc/c-api/import.rst +++ b/Doc/c-api/import.rst @@ -150,6 +150,11 @@ Importing Modules See also :c:func:`PyImport_ExecCodeModuleEx` and :c:func:`PyImport_ExecCodeModuleWithPathnames`. + .. versionchanged:: 3.12 + The setting of :attr:`__cached__` and :attr:`__loader__` is + deprecated. See :class:`~importlib.machinery.ModuleSpec` for + alternatives. + .. c:function:: PyObject* PyImport_ExecCodeModuleEx(const char *name, PyObject *co, const char *pathname) @@ -167,6 +172,10 @@ Importing Modules .. versionadded:: 3.3 + .. versionchanged:: 3.12 + Setting :attr:`__cached__` is deprecated. See + :class:`~importlib.machinery.ModuleSpec` for alternatives. + .. c:function:: PyObject* PyImport_ExecCodeModuleWithPathnames(const char *name, PyObject *co, const char *pathname, const char *cpathname) diff --git a/Doc/library/runpy.rst b/Doc/library/runpy.rst index 26a4f1435214e8..501f4ddf5a3e3f 100644 --- a/Doc/library/runpy.rst +++ b/Doc/library/runpy.rst @@ -93,6 +93,11 @@ The :mod:`runpy` module provides two functions: run this way, as well as ensuring the real module name is always accessible as ``__spec__.name``. + .. versionchanged:: 3.12 + The setting of ``__cached__``, ``__loader__``, and + ``__package__`` are deprecated. See + :class:`~importlib.machinery.ModuleSpec` for alternatives. + .. function:: run_path(path_name, init_globals=None, run_name=None) .. index:: @@ -163,6 +168,10 @@ The :mod:`runpy` module provides two functions: case where ``__main__`` is imported from a valid sys.path entry rather than being executed directly. + .. versionchanged:: 3.12 + The setting of ``__cached__``, ``__loader__``, and + ``__package__`` are deprecated. + .. seealso:: :pep:`338` -- Executing modules as scripts diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 507ba35221467e..d18c31fbe9866e 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -280,8 +280,8 @@ Pending Removal in Python 3.14 * Creating :c:data:`immutable types ` with mutable bases using the C API. -* ``__package__`` will cease to be set or taken into consideration by - the import system (:gh:`97879`). +* ``__package__`` and ``__cached__`` will cease to be set or taken + into consideration by the import system (:gh:`97879`). Pending Removal in Future Versions diff --git a/Lib/cProfile.py b/Lib/cProfile.py index 9fc97883020840..f7000a8bfa0ddb 100755 --- a/Lib/cProfile.py +++ b/Lib/cProfile.py @@ -7,6 +7,7 @@ __all__ = ["run", "runctx", "Profile"] import _lsprof +import importlib.machinery import profile as _pyprofile # ____________________________________________________________ @@ -169,9 +170,12 @@ def main(): sys.path.insert(0, os.path.dirname(progname)) with open(progname, 'rb') as fp: code = compile(fp.read(), progname, 'exec') + spec = importlib.machinery.ModuleSpec(name='__main__', loader=None, + origin=progname) globs = { - '__file__': progname, - '__name__': '__main__', + '__spec__': spec, + '__file__': spec.origin, + '__name__': spec.name, '__package__': None, '__cached__': None, } diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index b3c31b9659d849..efda49382540c5 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -182,6 +182,16 @@ def _path_isabs(path): return path.startswith(path_separators) +def _path_abspath(path): + """Replacement for os.path.abspath.""" + if not _path_isabs(path): + for sep in path_separators: + path = path.removeprefix(f".{sep}") + return _path_join(_os.getcwd(), path) + else: + return path + + def _write_atomic(path, data, mode=0o666): """Best-effort function to write data to a path atomically. Be prepared to handle a FileExistsError if concurrent writing of the @@ -494,8 +504,7 @@ def cache_from_source(path, debug_override=None, *, optimization=None): # make it absolute (`C:\Somewhere\Foo\Bar`), then make it root-relative # (`Somewhere\Foo\Bar`), so we end up placing the bytecode file in an # unambiguous `C:\Bytecode\Somewhere\Foo\Bar\`. - if not _path_isabs(head): - head = _path_join(_os.getcwd(), head) + head = _path_abspath(head) # Strip initial drive from a Windows path. We know we have an absolute # path here, so the second part of the check rules out a POSIX path that @@ -808,11 +817,10 @@ def spec_from_file_location(name, location=None, *, loader=None, pass else: location = _os.fspath(location) - if not _path_isabs(location): - try: - location = _path_join(_os.getcwd(), location) - except OSError: - pass + try: + location = _path_abspath(location) + except OSError: + pass # If the location is on the filesystem, but doesn't actually exist, # we could return None here, indicating that the location is not @@ -1564,10 +1572,8 @@ def __init__(self, path, *loader_details): # Base (directory) path if not path or path == '.': self.path = _os.getcwd() - elif not _path_isabs(path): - self.path = _path_join(_os.getcwd(), path) else: - self.path = path + self.path = _path_abspath(path) self._path_mtime = -1 self._path_cache = set() self._relaxed_path_cache = set() @@ -1717,6 +1723,8 @@ def _fix_up_module(ns, name, pathname, cpathname=None): loader = SourceFileLoader(name, pathname) if not spec: spec = spec_from_file_location(name, pathname, loader=loader) + if cpathname: + spec.cached = _path_abspath(cpathname) try: ns['__spec__'] = spec ns['__loader__'] = loader diff --git a/Lib/inspect.py b/Lib/inspect.py index 498ee7ab9eaf8a..8a107a894909eb 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -281,30 +281,15 @@ def get_annotations(obj, *, globals=None, locals=None, eval_str=False): # ----------------------------------------------------------- type-checking def ismodule(object): - """Return true if the object is a module. - - Module objects provide these attributes: - __cached__ pathname to byte compiled file - __doc__ documentation string - __file__ filename (missing for built-in modules)""" + """Return true if the object is a module.""" return isinstance(object, types.ModuleType) def isclass(object): - """Return true if the object is a class. - - Class objects provide these attributes: - __doc__ documentation string - __module__ name of module in which this class was defined""" + """Return true if the object is a class.""" return isinstance(object, type) def ismethod(object): - """Return true if the object is an instance method. - - Instance method objects provide these attributes: - __doc__ documentation string - __name__ name with which this method was defined - __func__ function object containing implementation of method - __self__ instance to which this method is bound""" + """Return true if the object is an instance method.""" return isinstance(object, types.MethodType) def ismethoddescriptor(object): diff --git a/Lib/profile.py b/Lib/profile.py index d8599fb4eebd66..453e56285c510c 100755 --- a/Lib/profile.py +++ b/Lib/profile.py @@ -24,6 +24,7 @@ # governing permissions and limitations under the License. +import importlib.machinery import sys import time import marshal @@ -589,9 +590,12 @@ def main(): sys.path.insert(0, os.path.dirname(progname)) with open(progname, 'rb') as fp: code = compile(fp.read(), progname, 'exec') + spec = importlib.machinery.ModuleSpec(name='__main__', loader=None, + origin=progname) globs = { - '__file__': progname, - '__name__': '__main__', + '__spec__': spec, + '__file__': spec.origin, + '__name__': spec.name, '__package__': None, '__cached__': None, } diff --git a/Lib/test/test_importlib/import_/test_helpers.py b/Lib/test/test_importlib/import_/test_helpers.py new file mode 100644 index 00000000000000..90df56f09fe52d --- /dev/null +++ b/Lib/test/test_importlib/import_/test_helpers.py @@ -0,0 +1,71 @@ +"""Tests for helper functions used by import.c .""" + +from importlib import _bootstrap_external, machinery +import os.path +import unittest + +from .. import util + + +class FixUpModuleTests: + + def test_no_loader_but_spec(self): + loader = object() + name = "hello" + path = "hello.py" + spec = machinery.ModuleSpec(name, loader) + ns = {"__spec__": spec} + _bootstrap_external._fix_up_module(ns, name, path) + + expected = {"__spec__": spec, "__loader__": loader, "__file__": path, + "__cached__": None} + self.assertEqual(ns, expected) + + def test_no_loader_no_spec_but_sourceless(self): + name = "hello" + path = "hello.py" + ns = {} + _bootstrap_external._fix_up_module(ns, name, path, path) + + expected = {"__file__": path, "__cached__": path} + + for key, val in expected.items(): + with self.subTest(f"{key}: {val}"): + self.assertEqual(ns[key], val) + + spec = ns["__spec__"] + self.assertIsInstance(spec, machinery.ModuleSpec) + self.assertEqual(spec.name, name) + self.assertEqual(spec.origin, os.path.abspath(path)) + self.assertEqual(spec.cached, os.path.abspath(path)) + self.assertIsInstance(spec.loader, machinery.SourcelessFileLoader) + self.assertEqual(spec.loader.name, name) + self.assertEqual(spec.loader.path, path) + self.assertEqual(spec.loader, ns["__loader__"]) + + def test_no_loader_no_spec_but_source(self): + name = "hello" + path = "hello.py" + ns = {} + _bootstrap_external._fix_up_module(ns, name, path) + + expected = {"__file__": path, "__cached__": None} + + for key, val in expected.items(): + with self.subTest(f"{key}: {val}"): + self.assertEqual(ns[key], val) + + spec = ns["__spec__"] + self.assertIsInstance(spec, machinery.ModuleSpec) + self.assertEqual(spec.name, name) + self.assertEqual(spec.origin, os.path.abspath(path)) + self.assertIsInstance(spec.loader, machinery.SourceFileLoader) + self.assertEqual(spec.loader.name, name) + self.assertEqual(spec.loader.path, path) + self.assertEqual(spec.loader, ns["__loader__"]) + + +FrozenFixUpModuleTests, SourceFixUpModuleTests = util.test_both(FixUpModuleTests) + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_inspect.py b/Lib/test/test_inspect.py index be9f29e04ae110..710b609ce550d6 100644 --- a/Lib/test/test_inspect.py +++ b/Lib/test/test_inspect.py @@ -4358,8 +4358,11 @@ def test_details(self): 'unittest', '--details') output = out.decode() # Just a quick sanity check on the output + self.assertIn(module.__spec__.name, output) self.assertIn(module.__name__, output) + self.assertIn(module.__spec__.origin, output) self.assertIn(module.__file__, output) + self.assertIn(module.__spec__.cached, output) self.assertIn(module.__cached__, output) self.assertEqual(err, b'') diff --git a/Lib/test/test_pydoc.py b/Lib/test/test_pydoc.py index 8ab3289dd74006..cefc71cb5a7f54 100644 --- a/Lib/test/test_pydoc.py +++ b/Lib/test/test_pydoc.py @@ -702,7 +702,7 @@ def test_synopsis(self): def test_synopsis_sourceless(self): os = import_helper.import_fresh_module('os') expected = os.__doc__.splitlines()[0] - filename = os.__cached__ + filename = os.__spec__.cached synopsis = pydoc.synopsis(filename) self.assertEqual(synopsis, expected) diff --git a/Misc/NEWS.d/next/Library/2022-10-06-17-59-22.gh-issue-65961.SXlQnI.rst b/Misc/NEWS.d/next/Library/2022-10-06-17-59-22.gh-issue-65961.SXlQnI.rst new file mode 100644 index 00000000000000..f708a75a50450e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-10-06-17-59-22.gh-issue-65961.SXlQnI.rst @@ -0,0 +1,2 @@ +Do not rely solely on ``__cached__`` on modules; code will also support +``__spec__.cached``. From b9d2e8171696514e9226164005f7bf24bf69e66d Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Fri, 7 Oct 2022 07:57:37 +0900 Subject: [PATCH 09/11] fixes gh-96078: os.sched_yield release the GIL while calling sched_yield(2). (gh-97965) --- .../2022-10-06-15-45-57.gh-issue-96078.fS-6mU.rst | 2 ++ Modules/posixmodule.c | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-10-06-15-45-57.gh-issue-96078.fS-6mU.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-06-15-45-57.gh-issue-96078.fS-6mU.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-15-45-57.gh-issue-96078.fS-6mU.rst new file mode 100644 index 00000000000000..d1f949c6e13adc --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-15-45-57.gh-issue-96078.fS-6mU.rst @@ -0,0 +1,2 @@ +:func:`os.sched_yield` now release the GIL while calling sched_yield(2). +Patch by Dong-hee Na. diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index cbdc259737583c..a72d57771c2294 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -7075,8 +7075,13 @@ static PyObject * os_sched_yield_impl(PyObject *module) /*[clinic end generated code: output=902323500f222cac input=e54d6f98189391d4]*/ { - if (sched_yield()) + int result; + Py_BEGIN_ALLOW_THREADS + result = sched_yield(); + Py_END_ALLOW_THREADS + if (result < 0) { return posix_error(); + } Py_RETURN_NONE; } From cbf0afd8a1474d68310331af9218606959d4cc22 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Thu, 6 Oct 2022 16:07:17 -0700 Subject: [PATCH 10/11] gh-97973: Return all necessary information from the tokenizer (GH-97984) Right now, the tokenizer only returns type and two pointers to the start and end of the token. This PR modifies the tokenizer to return the type and set all of the necessary information, so that the parser does not have to this. --- ...2-10-06-20-41-29.gh-issue-97973.gB-xWi.rst | 1 + Parser/pegen.c | 54 ++--- Parser/pegen_errors.c | 5 +- Parser/tokenizer.c | 220 ++++++++++-------- Parser/tokenizer.h | 8 +- Python/Python-tokenize.c | 17 +- 6 files changed, 159 insertions(+), 146 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-10-06-20-41-29.gh-issue-97973.gB-xWi.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-06-20-41-29.gh-issue-97973.gB-xWi.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-20-41-29.gh-issue-97973.gB-xWi.rst new file mode 100644 index 00000000000000..a0095a61ec32b7 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-20-41-29.gh-issue-97973.gB-xWi.rst @@ -0,0 +1 @@ +Modify the tokenizer to return all necessary information the parser needs to set location information in the AST nodes, so that the parser does not have to calculate those doing pointer arithmetic. diff --git a/Parser/pegen.c b/Parser/pegen.c index a5d123da51296c..1317606749b8c0 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -123,16 +123,18 @@ growable_comment_array_deallocate(growable_comment_array *arr) { } static int -_get_keyword_or_name_type(Parser *p, const char *name, int name_len) +_get_keyword_or_name_type(Parser *p, struct token *new_token) { + int name_len = new_token->end_col_offset - new_token->col_offset; assert(name_len > 0); + if (name_len >= p->n_keyword_lists || p->keywords[name_len] == NULL || p->keywords[name_len]->type == -1) { return NAME; } for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) { - if (strncmp(k->str, name, name_len) == 0) { + if (strncmp(k->str, new_token->start, name_len) == 0) { return k->type; } } @@ -140,33 +142,26 @@ _get_keyword_or_name_type(Parser *p, const char *name, int name_len) } static int -initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) { - assert(token != NULL); +initialize_token(Parser *p, Token *parser_token, struct token *new_token, int token_type) { + assert(parser_token != NULL); - token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type; - token->bytes = PyBytes_FromStringAndSize(start, end - start); - if (token->bytes == NULL) { + parser_token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, new_token) : token_type; + parser_token->bytes = PyBytes_FromStringAndSize(new_token->start, new_token->end - new_token->start); + if (parser_token->bytes == NULL) { return -1; } - - if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) { - Py_DECREF(token->bytes); + if (_PyArena_AddPyObject(p->arena, parser_token->bytes) < 0) { + Py_DECREF(parser_token->bytes); return -1; } - token->level = p->tok->level; - - const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start; - int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno; - int end_lineno = p->tok->lineno; - - int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1; - int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1; - - token->lineno = lineno; - token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset : col_offset; - token->end_lineno = end_lineno; - token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset : end_col_offset; + parser_token->level = new_token->level; + parser_token->lineno = new_token->lineno; + parser_token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->col_offset + : new_token->col_offset; + parser_token->end_lineno = new_token->end_lineno; + parser_token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->end_col_offset + : new_token->end_col_offset; p->fill += 1; @@ -202,26 +197,25 @@ _resize_tokens_array(Parser *p) { int _PyPegen_fill_token(Parser *p) { - const char *start; - const char *end; - int type = _PyTokenizer_Get(p->tok, &start, &end); + struct token new_token; + int type = _PyTokenizer_Get(p->tok, &new_token); // Record and skip '# type: ignore' comments while (type == TYPE_IGNORE) { - Py_ssize_t len = end - start; + Py_ssize_t len = new_token.end_col_offset - new_token.col_offset; char *tag = PyMem_Malloc(len + 1); if (tag == NULL) { PyErr_NoMemory(); return -1; } - strncpy(tag, start, len); + strncpy(tag, new_token.start, len); tag[len] = '\0'; // Ownership of tag passes to the growable array if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) { PyErr_NoMemory(); return -1; } - type = _PyTokenizer_Get(p->tok, &start, &end); + type = _PyTokenizer_Get(p->tok, &new_token); } // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing @@ -244,7 +238,7 @@ _PyPegen_fill_token(Parser *p) } Token *t = p->tokens[p->fill]; - return initialize_token(p, t, start, end, type); + return initialize_token(p, t, &new_token, type); } #if defined(Py_DEBUG) diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index 95bbd43dc32621..7738cbaf9ef39e 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -164,11 +164,10 @@ _PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) { Py_ssize_t current_err_line = current_token->lineno; int ret = 0; + struct token new_token; for (;;) { - const char *start; - const char *end; - switch (_PyTokenizer_Get(p->tok, &start, &end)) { + switch (_PyTokenizer_Get(p->tok, &new_token)) { case ERRORTOKEN: if (p->tok->level != 0) { int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 3c37fd9c45a49e..c5d3e580247cc1 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -36,6 +36,8 @@ /* Don't ever change this -- it would break the portability of Python code */ #define TABSIZE 8 +#define MAKE_TOKEN(token_type) token_setup(tok, token, token_type, p_start, p_end) + /* Forward */ static struct tok_state *tok_new(void); static int tok_nextc(struct tok_state *tok); @@ -1174,8 +1176,6 @@ syntaxerror_known_range(struct tok_state *tok, return ret; } - - static int indenterror(struct tok_state *tok) { @@ -1391,12 +1391,32 @@ tok_continuation_line(struct tok_state *tok) { } static int -tok_get(struct tok_state *tok, const char **p_start, const char **p_end) +token_setup(struct tok_state *tok, struct token *token, int type, const char *start, const char *end) +{ + assert((start == NULL && end == NULL) || (start != NULL && end != NULL)); + token->level = tok->level; + token->lineno = type == STRING ? tok->first_lineno : tok->lineno; + token->end_lineno = tok->lineno; + token->col_offset = -1; + token->end_col_offset = -1; + token->start = start; + token->end = end; + if (start != NULL && end != NULL) { + const char *line_start = type == STRING ? tok->multi_line_start : tok->line_start; + token->col_offset = (start >= line_start) ? (int)(start - line_start) : -1; + token->end_col_offset = (end >= tok->line_start) ? (int)(end - tok->line_start) : -1; + } + return type; +} + +static int +tok_get(struct tok_state *tok, struct token *token) { int c; int blankline, nonascii; - *p_start = *p_end = NULL; + const char *p_start = NULL; + const char *p_end = NULL; nextline: tok->start = NULL; blankline = 0; @@ -1426,7 +1446,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) // the level of indentation of whatever comes next. cont_line_col = cont_line_col ? cont_line_col : col; if ((c = tok_continuation_line(tok)) == -1) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } } else { @@ -1461,7 +1481,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) if (col == tok->indstack[tok->indent]) { /* No change */ if (altcol != tok->altindstack[tok->indent]) { - return indenterror(tok); + return MAKE_TOKEN(indenterror(tok)); } } else if (col > tok->indstack[tok->indent]) { @@ -1469,10 +1489,10 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) if (tok->indent+1 >= MAXINDENT) { tok->done = E_TOODEEP; tok->cur = tok->inp; - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } if (altcol <= tok->altindstack[tok->indent]) { - return indenterror(tok); + return MAKE_TOKEN(indenterror(tok)); } tok->pendin++; tok->indstack[++tok->indent] = col; @@ -1488,10 +1508,10 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) if (col != tok->indstack[tok->indent]) { tok->done = E_DEDENT; tok->cur = tok->inp; - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } if (altcol != tok->altindstack[tok->indent]) { - return indenterror(tok); + return MAKE_TOKEN(indenterror(tok)); } } } @@ -1503,11 +1523,11 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) if (tok->pendin != 0) { if (tok->pendin < 0) { tok->pendin++; - return DEDENT; + return MAKE_TOKEN(DEDENT); } else { tok->pendin--; - return INDENT; + return MAKE_TOKEN(INDENT); } } @@ -1587,34 +1607,34 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0])))); if (is_type_ignore) { - *p_start = ignore_end; - *p_end = tok->cur; + p_start = ignore_end; + p_end = tok->cur; /* If this type ignore is the only thing on the line, consume the newline also. */ if (blankline) { tok_nextc(tok); tok->atbol = 1; } - return TYPE_IGNORE; + return MAKE_TOKEN(TYPE_IGNORE); } else { - *p_start = type_start; /* after type_comment_prefix */ - *p_end = tok->cur; - return TYPE_COMMENT; + p_start = type_start; + p_end = tok->cur; + return MAKE_TOKEN(TYPE_COMMENT); } } } } if (tok->done == E_INTERACT_STOP) { - return ENDMARKER; + return MAKE_TOKEN(ENDMARKER); } /* Check for EOF and errors now */ if (c == EOF) { if (tok->level) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } - return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN; + return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN); } /* Identifier (most frequent token!) */ @@ -1654,11 +1674,11 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } tok_backup(tok, c); if (nonascii && !verify_identifier(tok)) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } - *p_start = tok->start; - *p_end = tok->cur; + p_start = tok->start; + p_end = tok->cur; /* async/await parsing block. */ if (tok->cur - tok->start == 5 && tok->start[0] == 'a') { @@ -1673,10 +1693,10 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) if (!tok->async_hacks || tok->async_def) { /* Always recognize the keywords. */ if (memcmp(tok->start, "async", 5) == 0) { - return ASYNC; + return MAKE_TOKEN(ASYNC); } if (memcmp(tok->start, "await", 5) == 0) { - return AWAIT; + return MAKE_TOKEN(AWAIT); } } else if (memcmp(tok->start, "async", 5) == 0) { @@ -1684,13 +1704,11 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) Look ahead one token to see if that is 'def'. */ struct tok_state ahead_tok; - const char *ahead_tok_start = NULL; - const char *ahead_tok_end = NULL; + struct token ahead_token; int ahead_tok_kind; memcpy(&ahead_tok, tok, sizeof(ahead_tok)); - ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start, - &ahead_tok_end); + ahead_tok_kind = tok_get(&ahead_tok, &ahead_token); if (ahead_tok_kind == NAME && ahead_tok.cur - ahead_tok.start == 3 @@ -1700,12 +1718,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) returning a plain NAME token, return ASYNC. */ tok->async_def_indent = tok->indent; tok->async_def = 1; - return ASYNC; + return MAKE_TOKEN(ASYNC); } } } - return NAME; + return MAKE_TOKEN(NAME); } /* Newline */ @@ -1714,15 +1732,15 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) if (blankline || tok->level > 0) { goto nextline; } - *p_start = tok->start; - *p_end = tok->cur - 1; /* Leave '\n' out of the string */ + p_start = tok->start; + p_end = tok->cur - 1; /* Leave '\n' out of the string */ tok->cont_line = 0; if (tok->async_def) { /* We're somewhere inside an 'async def' function, and we've encountered a NEWLINE after its signature. */ tok->async_def_nl = 1; } - return NEWLINE; + return MAKE_TOKEN(NEWLINE); } /* Period or number starting with period? */ @@ -1733,9 +1751,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } else if (c == '.') { c = tok_nextc(tok); if (c == '.') { - *p_start = tok->start; - *p_end = tok->cur; - return ELLIPSIS; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(ELLIPSIS); } else { tok_backup(tok, c); @@ -1745,9 +1763,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) else { tok_backup(tok, c); } - *p_start = tok->start; - *p_end = tok->cur; - return DOT; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(DOT); } /* Number */ @@ -1764,14 +1782,14 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } if (!isxdigit(c)) { tok_backup(tok, c); - return syntaxerror(tok, "invalid hexadecimal literal"); + return MAKE_TOKEN(syntaxerror(tok, "invalid hexadecimal literal")); } do { c = tok_nextc(tok); } while (isxdigit(c)); } while (c == '_'); if (!verify_end_of_number(tok, c, "hexadecimal")) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } } else if (c == 'o' || c == 'O') { @@ -1783,12 +1801,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } if (c < '0' || c >= '8') { if (isdigit(c)) { - return syntaxerror(tok, - "invalid digit '%c' in octal literal", c); + return MAKE_TOKEN(syntaxerror(tok, + "invalid digit '%c' in octal literal", c)); } else { tok_backup(tok, c); - return syntaxerror(tok, "invalid octal literal"); + return MAKE_TOKEN(syntaxerror(tok, "invalid octal literal")); } } do { @@ -1796,11 +1814,11 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } while ('0' <= c && c < '8'); } while (c == '_'); if (isdigit(c)) { - return syntaxerror(tok, - "invalid digit '%c' in octal literal", c); + return MAKE_TOKEN(syntaxerror(tok, + "invalid digit '%c' in octal literal", c)); } if (!verify_end_of_number(tok, c, "octal")) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } } else if (c == 'b' || c == 'B') { @@ -1812,12 +1830,11 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } if (c != '0' && c != '1') { if (isdigit(c)) { - return syntaxerror(tok, - "invalid digit '%c' in binary literal", c); + return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in binary literal", c)); } else { tok_backup(tok, c); - return syntaxerror(tok, "invalid binary literal"); + return MAKE_TOKEN(syntaxerror(tok, "invalid binary literal")); } } do { @@ -1825,11 +1842,10 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } while (c == '0' || c == '1'); } while (c == '_'); if (isdigit(c)) { - return syntaxerror(tok, - "invalid digit '%c' in binary literal", c); + return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in binary literal", c)); } if (!verify_end_of_number(tok, c, "binary")) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } } else { @@ -1841,7 +1857,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) c = tok_nextc(tok); if (!isdigit(c)) { tok_backup(tok, c); - return syntaxerror(tok, "invalid decimal literal"); + return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal")); } } if (c != '0') { @@ -1854,7 +1870,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) nonzero = 1; c = tok_decimal_tail(tok); if (c == 0) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } } if (c == '.') { @@ -1870,15 +1886,15 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) else if (nonzero) { /* Old-style octal: now disallowed. */ tok_backup(tok, c); - return syntaxerror_known_range( + return MAKE_TOKEN(syntaxerror_known_range( tok, (int)(tok->start + 1 - tok->line_start), (int)(zeros_end - tok->line_start), "leading zeros in decimal integer " "literals are not permitted; " - "use an 0o prefix for octal integers"); + "use an 0o prefix for octal integers")); } if (!verify_end_of_number(tok, c, "decimal")) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } } } @@ -1886,7 +1902,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) /* Decimal */ c = tok_decimal_tail(tok); if (c == 0) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } { /* Accept floating point numbers. */ @@ -1897,7 +1913,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) if (isdigit(c)) { c = tok_decimal_tail(tok); if (c == 0) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } } } @@ -1911,21 +1927,21 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) c = tok_nextc(tok); if (!isdigit(c)) { tok_backup(tok, c); - return syntaxerror(tok, "invalid decimal literal"); + return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal")); } } else if (!isdigit(c)) { tok_backup(tok, c); if (!verify_end_of_number(tok, e, "decimal")) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } tok_backup(tok, e); - *p_start = tok->start; - *p_end = tok->cur; - return NUMBER; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(NUMBER); } c = tok_decimal_tail(tok); if (c == 0) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } } if (c == 'j' || c == 'J') { @@ -1933,18 +1949,18 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) imaginary: c = tok_nextc(tok); if (!verify_end_of_number(tok, c, "imaginary")) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } } else if (!verify_end_of_number(tok, c, "decimal")) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } } } tok_backup(tok, c); - *p_start = tok->start; - *p_end = tok->cur; - return NUMBER; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(NUMBER); } letter_quote: @@ -1997,7 +2013,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) if (c != '\n') { tok->done = E_EOFS; } - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } else { syntaxerror(tok, "unterminated string literal (detected at" @@ -2005,7 +2021,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) if (c != '\n') { tok->done = E_EOLS; } - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } } if (c == quote) { @@ -2019,15 +2035,15 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } } - *p_start = tok->start; - *p_end = tok->cur; - return STRING; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(STRING); } /* Line continuation */ if (c == '\\') { if ((c = tok_continuation_line(tok)) == -1) { - return ERRORTOKEN; + return MAKE_TOKEN(ERRORTOKEN); } tok->cont_line = 1; goto again; /* Read next line */ @@ -2036,19 +2052,19 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) /* Check for two-character token */ { int c2 = tok_nextc(tok); - int token = _PyToken_TwoChars(c, c2); - if (token != OP) { + int current_token = _PyToken_TwoChars(c, c2); + if (current_token != OP) { int c3 = tok_nextc(tok); - int token3 = _PyToken_ThreeChars(c, c2, c3); - if (token3 != OP) { - token = token3; + int current_token3 = _PyToken_ThreeChars(c, c2, c3); + if (current_token3 != OP) { + current_token = current_token3; } else { tok_backup(tok, c3); } - *p_start = tok->start; - *p_end = tok->cur; - return token; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(current_token); } tok_backup(tok, c2); } @@ -2059,7 +2075,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) case '[': case '{': if (tok->level >= MAXLEVEL) { - return syntaxerror(tok, "too many nested parentheses"); + return MAKE_TOKEN(syntaxerror(tok, "too many nested parentheses")); } tok->parenstack[tok->level] = c; tok->parenlinenostack[tok->level] = tok->lineno; @@ -2070,7 +2086,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) case ']': case '}': if (!tok->level) { - return syntaxerror(tok, "unmatched '%c'", c); + return MAKE_TOKEN(syntaxerror(tok, "unmatched '%c'", c)); } tok->level--; int opening = tok->parenstack[tok->level]; @@ -2079,16 +2095,16 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) (opening == '{' && c == '}'))) { if (tok->parenlinenostack[tok->level] != tok->lineno) { - return syntaxerror(tok, + return MAKE_TOKEN(syntaxerror(tok, "closing parenthesis '%c' does not match " "opening parenthesis '%c' on line %d", - c, opening, tok->parenlinenostack[tok->level]); + c, opening, tok->parenlinenostack[tok->level])); } else { - return syntaxerror(tok, + return MAKE_TOKEN(syntaxerror(tok, "closing parenthesis '%c' does not match " "opening parenthesis '%c'", - c, opening); + c, opening)); } } break; @@ -2097,20 +2113,19 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) if (!Py_UNICODE_ISPRINTABLE(c)) { char hex[9]; (void)PyOS_snprintf(hex, sizeof(hex), "%04X", c); - return syntaxerror(tok, "invalid non-printable character U+%s", hex); + return MAKE_TOKEN(syntaxerror(tok, "invalid non-printable character U+%s", hex)); } /* Punctuation character */ - *p_start = tok->start; - *p_end = tok->cur; - return _PyToken_OneChar(c); + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(_PyToken_OneChar(c)); } int -_PyTokenizer_Get(struct tok_state *tok, - const char **p_start, const char **p_end) +_PyTokenizer_Get(struct tok_state *tok, struct token *token) { - int result = tok_get(tok, p_start, p_end); + int result = tok_get(tok, token); if (tok->decoding_erred) { result = ERRORTOKEN; tok->done = E_DECODE; @@ -2166,8 +2181,6 @@ _PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) { struct tok_state *tok; FILE *fp; - const char *p_start = NULL; - const char *p_end = NULL; char *encoding = NULL; fp = fdopen_borrow(fd); @@ -2191,8 +2204,9 @@ _PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) return encoding; } } + struct token token; while (tok->lineno < 2 && tok->done == E_OK) { - _PyTokenizer_Get(tok, &p_start, &p_end); + _PyTokenizer_Get(tok, &token); } fclose(fp); if (tok->encoding) { diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 5ac64a99b7d661..5b8c7f314386ec 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -27,6 +27,12 @@ enum interactive_underflow_t { IUNDERFLOW_STOP, }; +struct token { + int level; + int lineno, col_offset, end_lineno, end_col_offset; + const char *start, *end; +}; + /* Tokenizer state */ struct tok_state { /* Input state; buf <= cur <= inp <= end */ @@ -94,7 +100,7 @@ extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int); extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*, const char *, const char *); extern void _PyTokenizer_Free(struct tok_state *); -extern int _PyTokenizer_Get(struct tok_state *, const char **, const char **); +extern int _PyTokenizer_Get(struct tok_state *, struct token *); #define tok_dump _Py_tok_dump diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index c5124a6942e7f2..8daa9877254e2e 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -60,9 +60,8 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source) static PyObject * tokenizeriter_next(tokenizeriterobject *it) { - const char *start; - const char *end; - int type = _PyTokenizer_Get(it->tok, &start, &end); + struct token token; + int type = _PyTokenizer_Get(it->tok, &token); if (type == ERRORTOKEN && PyErr_Occurred()) { return NULL; } @@ -71,11 +70,11 @@ tokenizeriter_next(tokenizeriterobject *it) return NULL; } PyObject *str = NULL; - if (start == NULL || end == NULL) { + if (token.start == NULL || token.end == NULL) { str = PyUnicode_FromString(""); } else { - str = PyUnicode_FromStringAndSize(start, end - start); + str = PyUnicode_FromStringAndSize(token.start, token.end - token.start); } if (str == NULL) { return NULL; @@ -92,11 +91,11 @@ tokenizeriter_next(tokenizeriterobject *it) int end_lineno = it->tok->lineno; int col_offset = -1; int end_col_offset = -1; - if (start != NULL && start >= line_start) { - col_offset = (int)(start - line_start); + if (token.start != NULL && token.start >= line_start) { + col_offset = (int)(token.start - line_start); } - if (end != NULL && end >= it->tok->line_start) { - end_col_offset = (int)(end - it->tok->line_start); + if (token.end != NULL && token.end >= it->tok->line_start) { + end_col_offset = (int)(token.end - it->tok->line_start); } return Py_BuildValue("(NiiiiiN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line); From 21a2d9ff550977f2668e2cf1cc15793bf27fa109 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 6 Oct 2022 16:20:01 -0700 Subject: [PATCH 11/11] GH-97002: Prevent `_PyInterpreterFrame`s from backing more than one `PyFrameObject` (GH-97996) --- Lib/test/test_frame.py | 65 +++++++++++++++++++ ...2-10-06-02-11-34.gh-issue-97002.Zvsk71.rst | 3 + Python/frame.c | 29 +++++++-- 3 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst diff --git a/Lib/test/test_frame.py b/Lib/test/test_frame.py index 5dda2fbfac374c..4b86a60d2f4c36 100644 --- a/Lib/test/test_frame.py +++ b/Lib/test/test_frame.py @@ -1,3 +1,4 @@ +import gc import re import sys import textwrap @@ -261,5 +262,69 @@ def gen(): """) assert_python_ok("-c", code) + @support.cpython_only + def test_sneaky_frame_object(self): + + def trace(frame, event, arg): + """ + Don't actually do anything, just force a frame object to be created. + """ + + def callback(phase, info): + """ + Yo dawg, I heard you like frames, so I'm allocating a frame while + you're allocating a frame, so you can have a frame while you have a + frame! + """ + nonlocal sneaky_frame_object + sneaky_frame_object = sys._getframe().f_back + # We're done here: + gc.callbacks.remove(callback) + + def f(): + while True: + yield + + old_threshold = gc.get_threshold() + old_callbacks = gc.callbacks[:] + old_enabled = gc.isenabled() + old_trace = sys.gettrace() + try: + # Stop the GC for a second while we set things up: + gc.disable() + # Create a paused generator: + g = f() + next(g) + # Move all objects to the oldest generation, and tell the GC to run + # on the *very next* allocation: + gc.collect() + gc.set_threshold(1, 0, 0) + # Okay, so here's the nightmare scenario: + # - We're tracing the resumption of a generator, which creates a new + # frame object. + # - The allocation of this frame object triggers a collection + # *before* the frame object is actually created. + # - During the collection, we request the exact same frame object. + # This test does it with a GC callback, but in real code it would + # likely be a trace function, weakref callback, or finalizer. + # - The collection finishes, and the original frame object is + # created. We now have two frame objects fighting over ownership + # of the same interpreter frame! + sys.settrace(trace) + gc.callbacks.append(callback) + sneaky_frame_object = None + gc.enable() + next(g) + # g.gi_frame should be the the frame object from the callback (the + # one that was *requested* second, but *created* first): + self.assertIs(g.gi_frame, sneaky_frame_object) + finally: + gc.set_threshold(*old_threshold) + gc.callbacks[:] = old_callbacks + sys.settrace(old_trace) + if old_enabled: + gc.enable() + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst new file mode 100644 index 00000000000000..1f577e02e1fd8a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst @@ -0,0 +1,3 @@ +Fix an issue where several frame objects could be backed by the same +interpreter frame, possibly leading to corrupted memory and hard crashes of +the interpreter. diff --git a/Python/frame.c b/Python/frame.c index 96566de63a78fd..89f084b110cb5a 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -37,14 +37,31 @@ _PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame *frame) Py_XDECREF(error_type); Py_XDECREF(error_value); Py_XDECREF(error_traceback); + return NULL; } - else { - assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT); - assert(frame->owner != FRAME_CLEARED); - f->f_frame = frame; - frame->frame_obj = f; - PyErr_Restore(error_type, error_value, error_traceback); + PyErr_Restore(error_type, error_value, error_traceback); + if (frame->frame_obj) { + // GH-97002: How did we get into this horrible situation? Most likely, + // allocating f triggered a GC collection, which ran some code that + // *also* created the same frame... while we were in the middle of + // creating it! See test_sneaky_frame_object in test_frame.py for a + // concrete example. + // + // Regardless, just throw f away and use that frame instead, since it's + // already been exposed to user code. It's actually a bit tricky to do + // this, since we aren't backed by a real _PyInterpreterFrame anymore. + // Just pretend that we have an owned, cleared frame so frame_dealloc + // doesn't make the situation worse: + f->f_frame = (_PyInterpreterFrame *)f->_f_frame_data; + f->f_frame->owner = FRAME_CLEARED; + f->f_frame->frame_obj = f; + Py_DECREF(f); + return frame->frame_obj; } + assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT); + assert(frame->owner != FRAME_CLEARED); + f->f_frame = frame; + frame->frame_obj = f; return f; }