diff --git a/requirements/development.in b/requirements/development.in index efa332051ccf4..8f2379941e655 100644 --- a/requirements/development.in +++ b/requirements/development.in @@ -26,3 +26,4 @@ tableschema thrift>=0.11.0,<1.0.0 pygithub>=1.54.1,<2.0.0 progress>=1.5,<2 +pyinstrument>=4.0.2,<5 diff --git a/requirements/development.txt b/requirements/development.txt index 959c6466c25ab..be77b5847218f 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -1,4 +1,4 @@ -# SHA1:c470411e2e9cb04b412a94f80a6a9d870bece74d +# SHA1:1144991012e228fb2ef85afbf78a635e7d5a33f1 # # This file is autogenerated by pip-compile-multi # To update, run: @@ -54,6 +54,8 @@ pygithub==1.54.1 # via -r requirements/development.in pyhive[hive]==0.6.3 # via -r requirements/development.in +pyinstrument==4.0.2 + # via -r requirements/development.in requests==2.24.0 # via # pydruid diff --git a/setup.cfg b/setup.cfg index 1e16680ee0b4e..a4f48d820e982 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ combine_as_imports = true include_trailing_comma = true line_length = 88 known_first_party = superset -known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,tabulate,typing_extensions,werkzeug,wtforms,wtforms_json,yaml +known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyinstrument,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,tabulate,typing_extensions,werkzeug,wtforms,wtforms_json,yaml multi_line_output = 3 order_by_type = false diff --git a/superset/config.py b/superset/config.py index 6453b0ce04121..b75be326631e9 100644 --- a/superset/config.py +++ b/superset/config.py @@ -196,6 +196,10 @@ def _try_json_readsha( # pylint: disable=unused-argument DEBUG = os.environ.get("FLASK_ENV") == "development" FLASK_USE_RELOAD = True +# Enable profiling of Python calls. Turn this on and append ``?_instrument=1`` +# to the page to see the call stack. +PROFILING = False + # Superset allows server-side python stacktraces to be surfaced to the # user when this feature is on. This may has security implications # and it's more secure to turn it off in production settings. diff --git a/superset/extensions.py b/superset/extensions.py index d46894447d40b..619de6a2346b6 100644 --- a/superset/extensions.py +++ b/superset/extensions.py @@ -32,6 +32,7 @@ from superset.utils.encrypt import EncryptedFieldFactory from superset.utils.feature_flag_manager import FeatureFlagManager from superset.utils.machine_auth import MachineAuthProviderFactory +from superset.utils.profiler import SupersetProfiler class ResultsBackendManager: @@ -97,6 +98,14 @@ def get_manifest_files(self, bundle: str, asset_type: str) -> List[str]: return self.manifest.get(bundle, {}).get(asset_type, []) +class ProfilingExtension: + def __init__(self, interval: float = 1e-4) -> None: + self.interval = interval + + def init_app(self, app: Flask) -> None: + app.wsgi_app = SupersetProfiler(app.wsgi_app, self.interval) # type: ignore + + APP_DIR = os.path.dirname(__file__) appbuilder = AppBuilder(update_perms=False) async_query_manager = AsyncQueryManager() @@ -111,6 +120,7 @@ def get_manifest_files(self, bundle: str, asset_type: str) -> List[str]: machine_auth_provider_factory = MachineAuthProviderFactory() manifest_processor = UIManifestProcessor(APP_DIR) migrate = Migrate() +profiling = ProfilingExtension() results_backend_manager = ResultsBackendManager() security_manager = LocalProxy(lambda: appbuilder.sm) talisman = Talisman() diff --git a/superset/initialization/__init__.py b/superset/initialization/__init__.py index 1d6298dc3eb3d..066ae3245c9fc 100644 --- a/superset/initialization/__init__.py +++ b/superset/initialization/__init__.py @@ -42,6 +42,7 @@ machine_auth_provider_factory, manifest_processor, migrate, + profiling, results_backend_manager, talisman, ) @@ -566,6 +567,7 @@ def init_app(self) -> None: self.configure_db_encrypt() self.setup_db() self.configure_celery() + self.enable_profiling() self.setup_event_logger() self.setup_bundle_manifest() self.register_blueprints() @@ -716,6 +718,10 @@ def register_blueprints(self) -> None: def setup_bundle_manifest(self) -> None: manifest_processor.init_app(self.superset_app) + def enable_profiling(self) -> None: + if self.config["PROFILING"]: + profiling.init_app(self.superset_app) + class SupersetIndexView(IndexView): @expose("/") diff --git a/superset/utils/profiler.py b/superset/utils/profiler.py new file mode 100644 index 0000000000000..38de88e775f06 --- /dev/null +++ b/superset/utils/profiler.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Callable +from unittest import mock + +from pyinstrument import Profiler +from werkzeug.wrappers import Request, Response + + +class SupersetProfiler: + """ + WSGI middleware to instrument Superset. + + To see the instrumentation for a given page, set `PROFILING=True` + in the config, and append `?_instrument=1` to the page. + """ + + def __init__( + self, app: Callable[[Any, Any], Any], interval: float = 0.0001, + ): + self.app = app + self.interval = interval + + @Request.application + def __call__(self, request: Request) -> Response: + if request.args.get("_instrument") != "1": + return Response.from_app(self.app, request.environ) + + profiler = Profiler(interval=self.interval) + + # call original request + fake_start_response = mock.MagicMock() + with profiler: + self.app(request.environ, fake_start_response) + + # return HTML profiling information + return Response(profiler.output_html(), mimetype="text/html")