From 99cddf78a2c1760c4fe45a3a78374eb9268aad1f Mon Sep 17 00:00:00 2001 From: Jillian Vogel Date: Wed, 30 Nov 2022 19:45:54 +1030 Subject: [PATCH] feat: adds superset docker services and open edx integrations * install custom requirements for mysql and OAuth2 * configure Superset to use the Open edX mysql service * install custom oauth2 manager and course permissions plugin * init OAuth2 application on LMS --- README.rst | 16 +- tutorsuperset/plugin.py | 134 ++++++++++++- .../superset/apps/docker/docker-bootstrap.sh | 54 ++++++ .../superset/apps/docker/docker-init.sh | 79 ++++++++ .../apps/docker/requirements-local.txt | 2 + .../openedx_sso_security_manager.py | 182 ++++++++++++++++++ .../apps/pythonpath/superset_config.py | 125 ++++++++++++ .../apps/pythonpath/superset_config_docker.py | 76 ++++++++ .../templates/superset/tasks/init.sh | 9 + 9 files changed, 670 insertions(+), 7 deletions(-) create mode 100755 tutorsuperset/templates/superset/apps/docker/docker-bootstrap.sh create mode 100755 tutorsuperset/templates/superset/apps/docker/docker-init.sh create mode 100644 tutorsuperset/templates/superset/apps/docker/requirements-local.txt create mode 100644 tutorsuperset/templates/superset/apps/pythonpath/openedx_sso_security_manager.py create mode 100644 tutorsuperset/templates/superset/apps/pythonpath/superset_config.py create mode 100644 tutorsuperset/templates/superset/apps/pythonpath/superset_config_docker.py create mode 100644 tutorsuperset/templates/superset/tasks/init.sh diff --git a/README.rst b/README.rst index 7387eb1..ba438df 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ Installation :: - pip install git+https://github.com/opencraft/tutor-contrib-superset + pip install git+https://github.com/open-craft/tutor-contrib-superset Usage ----- @@ -14,9 +14,21 @@ Usage :: tutor plugins enable superset + tutor local start superset-launcher + + # Initialize superset data + tutor local start superset-init + + # Set up SSO + tutor dev init --limit superset_service + + +Connect to Superset's UI on the configured port (default is `:8088`): + + http://local.overhang.io:8088 License ------- -This software is licensed under the terms of the AGPLv3. \ No newline at end of file +This software is licensed under the terms of the AGPLv3. diff --git a/tutorsuperset/plugin.py b/tutorsuperset/plugin.py index 34331f5..c535a5d 100644 --- a/tutorsuperset/plugin.py +++ b/tutorsuperset/plugin.py @@ -17,6 +17,21 @@ # Each new setting is a pair: (setting_name, default_value). # Prefix your setting names with 'SUPERSET_'. ("SUPERSET_VERSION", __version__), + ("SUPERSET_TAG", "latest-dev"), + ("SUPERSET_HOST", "{{ LMS_HOST }}"), + ("SUPERSET_PORT", "8088"), + ("SUPERSET_DB_DIALECT", "mysql"), + ("SUPERSET_DB_HOST", "{{ MYSQL_HOST }}"), + ("SUPERSET_DB_PORT", "{{ MYSQL_PORT }}"), + ("SUPERSET_DB_NAME", "superset"), + ("SUPERSET_DB_USERNAME", "superset"), + ("SUPERSET_OAUTH2_BASE_URL", "{% if ENABLE_HTTPS %}https{% else %}http{% endif %}://{{ LMS_HOST }}"), + ("SUPERSET_OAUTH2_ACCESS_TOKEN_URL", "{{ SUPERSET_OAUTH2_BASE_URL }}/oauth2/access_token/"), + ("SUPERSET_OAUTH2_AUTHORIZE_URL", "{{ SUPERSET_OAUTH2_BASE_URL }}/oauth2/authorize/"), + ("SUPERSET_OPENEDX_USERNAME_URL", "{{ SUPERSET_OAUTH2_BASE_URL }}/api/user/v1/me"), + ("SUPERSET_OPENEDX_USER_PROFILE_URL", "{{ SUPERSET_OAUTH2_BASE_URL }}/api/user/v1/accounts/{username}"), + ("SUPERSET_OPENEDX_COURSES_LIST_URL", + "{{ SUPERSET_OAUTH2_BASE_URL }}/api/courses/v1/courses/?permissions={permission}&username={username}"), ] ) @@ -27,7 +42,10 @@ # Each new setting is a pair: (setting_name, unique_generated_value). # Prefix your setting names with 'SUPERSET_'. # For example: - # ("SUPERSET_SECRET_KEY", "{{ 24|random_string }}"), + ("SUPERSET_SECRET_KEY", "{{ 24|random_string }}"), + ("SUPERSET_DB_PASSWORD", "{{ 24|random_string }}"), + ("SUPERSET_OAUTH2_CLIENT_ID", "{{ 16|random_string }}"), + ("SUPERSET_OAUTH2_CLIENT_SECRET", "{{ 16|random_string }}"), ] ) @@ -46,10 +64,10 @@ ######################################## # To run the script from templates/superset/tasks/myservice/init, add: -# hooks.Filters.COMMANDS_INIT.add_item(( -# "myservice", -# ("superset", "tasks", "myservice", "init"), -# )) +hooks.Filters.COMMANDS_INIT.add_item(( + "superset_service", + ("superset", "tasks", "init.sh"), +)) ######################################## @@ -100,6 +118,112 @@ ], ) +# docker-compose statements shared between the superset services +SUPERSET_DOCKER_COMPOSE_SHARED = """ + image: apache/superset:{{ SUPERSET_TAG }} + environment: + DATABASE_DIALECT: {{ SUPERSET_DB_DIALECT }} + DATABASE_HOST: {{ SUPERSET_DB_HOST }} + DATABASE_PORT: {{ SUPERSET_DB_PORT }} + DATABASE_DB: {{ SUPERSET_DB_NAME }} + DATABASE_HOST: {{ SUPERSET_DB_HOST }} + DATABASE_PASSWORD: {{ SUPERSET_DB_PASSWORD }} + DATABASE_USER: {{ SUPERSET_DB_USERNAME }} + OPENEDX_MYSQL_HOST: {{ MYSQL_HOST }} + OPENEDX_MYSQL_PORT: {{ MYSQL_PORT }} + OPENEDX_MYSQL_DATABASE: {{ OPENEDX_MYSQL_DATABASE }} + OPENEDX_MYSQL_USERNAME: {{ OPENEDX_MYSQL_USERNAME }} + OPENEDX_MYSQL_PASSWORD: {{ OPENEDX_MYSQL_PASSWORD }} + OAUTH2_CLIENT_ID: {{ SUPERSET_OAUTH2_CLIENT_ID }} + OAUTH2_CLIENT_SECRET: {{ SUPERSET_OAUTH2_CLIENT_SECRET }} + OAUTH2_BASE_URL: {{ SUPERSET_OAUTH2_BASE_URL }} + OAUTH2_ACCESS_TOKEN_URL: {{ SUPERSET_OAUTH2_ACCESS_TOKEN_URL }} + OAUTH2_AUTHORIZE_URL: {{ SUPERSET_OAUTH2_AUTHORIZE_URL }} + OPENEDX_USERNAME_URL: {{ SUPERSET_OPENEDX_USERNAME_URL }} + OPENEDX_USER_PROFILE_URL: {{ SUPERSET_OPENEDX_USER_PROFILE_URL }} + OPENEDX_COURSES_LIST_URL: {{ SUPERSET_OPENEDX_COURSES_LIST_URL }} + SECRET_KEY: {{ SUPERSET_SECRET_KEY }} + PYTHONPATH: /app/pythonpath:/app/docker/pythonpath_dev + REDIS_HOST: superset_redis + REDIS_PORT: 6379 + FLASK_ENV: production + SUPERSET_ENV: production + SUPERSET_LOAD_EXAMPLES: 0 + CYPRESS_CONFIG: 0 + SUPERSET_PORT: {{ SUPERSET_PORT }} + user: root + restart: unless-stopped + depends_on: + - mysql + - superset_redis + volumes: + - ../../env/plugins/superset/apps/docker:/app/docker + - ../../env/plugins/superset/apps/pythonpath:/app/pythonpath + - ../../env/plugins/superset/apps/superset_home:/app/superset_home +""" + +hooks.Filters.ENV_PATCHES.add_item( + ( + "local-docker-compose-services", + f""" +# Modified from https://github.com/apache/superset/blob/969c963/docker-compose-non-dev.yml + +# FIXME -- should we use Open edX's redis service instead? +superset_redis: + image: redis:latest + restart: unless-stopped + volumes: + - ../../data/superset/redis:/data + +superset_service: + {SUPERSET_DOCKER_COMPOSE_SHARED} + command: ["bash", "/app/docker/docker-bootstrap.sh", "app-gunicorn"] + ports: + - 8088:{{{{ SUPERSET_PORT }}}} + +superset_worker: + {SUPERSET_DOCKER_COMPOSE_SHARED} + command: ["bash", "/app/docker/docker-bootstrap.sh", "worker"] + healthcheck: + test: ["CMD-SHELL", "celery inspect ping -A superset.tasks.celery_app:app -d celery@$$HOSTNAME"] + +superset_worker-beat: + {SUPERSET_DOCKER_COMPOSE_SHARED} + command: ["bash", "/app/docker/docker-bootstrap.sh", "worker"] + healthcheck: + disable: true + +# All the superset services we need to run together +superset-launcher: + image: apache/superset:{{{{ SUPERSET_TAG }}}} + command: ["bash"] + depends_on: + - superset_service + - superset_worker + - superset_worker-beat + +# One-off data initialization +superset-init: + {SUPERSET_DOCKER_COMPOSE_SHARED} + command: ["bash", "/app/docker/docker-init.sh"] + healthcheck: + disable: true + """ + ) +) + +hooks.Filters.ENV_PATCHES.add_item( + ( + "local-docker-compose-jobs-services", + """ +superset_service-jobs: + image: apache/superset:{{ SUPERSET_TAG }} + depends_on: + - superset-launcher + """ + ) +) + ######################################## # PATCH LOADING diff --git a/tutorsuperset/templates/superset/apps/docker/docker-bootstrap.sh b/tutorsuperset/templates/superset/apps/docker/docker-bootstrap.sh new file mode 100755 index 0000000..9ef31c0 --- /dev/null +++ b/tutorsuperset/templates/superset/apps/docker/docker-bootstrap.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Copied from original: +# +# https://github.com/apache/superset/blob/969c963/docker/docker-bootstrap.sh + +set -eo pipefail + +REQUIREMENTS_LOCAL="/app/docker/requirements-local.txt" +# If Cypress run – overwrite the password for admin and export env variables +if [ "$CYPRESS_CONFIG" == "true" ]; then + export SUPERSET_CONFIG=tests.integration_tests.superset_test_config + export SUPERSET_TESTENV=true + export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset +fi +# +# Make sure we have dev requirements installed +# +if [ -f "${REQUIREMENTS_LOCAL}" ]; then + echo "Installing local overrides at ${REQUIREMENTS_LOCAL}" + pip install -r "${REQUIREMENTS_LOCAL}" +else + echo "Skipping local overrides" +fi + +if [[ "${1}" == "worker" ]]; then + echo "Starting Celery worker..." + celery --app=superset.tasks.celery_app:app worker -Ofair -l INFO +elif [[ "${1}" == "beat" ]]; then + echo "Starting Celery beat..." + celery --app=superset.tasks.celery_app:app beat --pidfile /tmp/celerybeat.pid -l INFO -s "${SUPERSET_HOME}"/celerybeat-schedule +elif [[ "${1}" == "app" ]]; then + echo "Starting web app..." + flask run -p 8088 --with-threads --reload --debugger --host=0.0.0.0 +elif [[ "${1}" == "app-gunicorn" ]]; then + echo "Starting web app..." + /usr/bin/run-server.sh +fi diff --git a/tutorsuperset/templates/superset/apps/docker/docker-init.sh b/tutorsuperset/templates/superset/apps/docker/docker-init.sh new file mode 100755 index 0000000..618276d --- /dev/null +++ b/tutorsuperset/templates/superset/apps/docker/docker-init.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Modified from original: +# +# https://github.com/apache/superset/blob/969c963/docker/docker-init.sh + +set -e + +STEP_CNT=4 + +echo_step() { +cat < str: + """Get the environment variable or raise exception.""" + try: + return os.environ[var_name] + except KeyError: + if default is not None: + return default + else: + error_msg = "The environment variable {} was missing, abort...".format( + var_name + ) + raise EnvironmentError(error_msg) + + +DATABASE_DIALECT = get_env_variable("DATABASE_DIALECT") +DATABASE_USER = get_env_variable("DATABASE_USER") +DATABASE_PASSWORD = get_env_variable("DATABASE_PASSWORD") +DATABASE_HOST = get_env_variable("DATABASE_HOST") +DATABASE_PORT = get_env_variable("DATABASE_PORT") +DATABASE_DB = get_env_variable("DATABASE_DB") + +# The SQLAlchemy connection string. +SQLALCHEMY_DATABASE_URI = "%s://%s:%s@%s:%s/%s" % ( + DATABASE_DIALECT, + DATABASE_USER, + DATABASE_PASSWORD, + DATABASE_HOST, + DATABASE_PORT, + DATABASE_DB, +) + +REDIS_HOST = get_env_variable("REDIS_HOST") +REDIS_PORT = get_env_variable("REDIS_PORT") +REDIS_CELERY_DB = get_env_variable("REDIS_CELERY_DB", "0") +REDIS_RESULTS_DB = get_env_variable("REDIS_RESULTS_DB", "1") + +RESULTS_BACKEND = FileSystemCache("/app/superset_home/sqllab") + +CACHE_CONFIG = { + "CACHE_TYPE": "redis", + "CACHE_DEFAULT_TIMEOUT": 300, + "CACHE_KEY_PREFIX": "superset_", + "CACHE_REDIS_HOST": REDIS_HOST, + "CACHE_REDIS_PORT": REDIS_PORT, + "CACHE_REDIS_DB": REDIS_RESULTS_DB, +} +DATA_CACHE_CONFIG = CACHE_CONFIG + + +class CeleryConfig(object): + BROKER_URL = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CELERY_DB}" + CELERY_IMPORTS = ("superset.sql_lab",) + CELERY_RESULT_BACKEND = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_RESULTS_DB}" + CELERYD_LOG_LEVEL = "DEBUG" + CELERYD_PREFETCH_MULTIPLIER = 1 + CELERY_ACKS_LATE = False + CELERYBEAT_SCHEDULE = { + "reports.scheduler": { + "task": "reports.scheduler", + "schedule": crontab(minute="*", hour="*"), + }, + "reports.prune_log": { + "task": "reports.prune_log", + "schedule": crontab(minute=10, hour=0), + }, + } + + +CELERY_CONFIG = CeleryConfig + +FEATURE_FLAGS = {"ALERT_REPORTS": True} +ALERT_REPORTS_NOTIFICATION_DRY_RUN = True +WEBDRIVER_BASEURL = "http://superset:8088/" +# The base URL for the email report hyperlinks. +WEBDRIVER_BASEURL_USER_FRIENDLY = WEBDRIVER_BASEURL + +SQLLAB_CTAS_NO_LIMIT = True + +# +# Optionally import superset_config_docker.py (which will have been included on +# the PYTHONPATH) in order to allow for local settings to be overridden +# +try: + import superset_config_docker + from superset_config_docker import * # noqa + + print(f"Loaded your Docker configuration at " f"[{superset_config_docker.__file__}]") +except ImportError: + print("Using default Docker config...") diff --git a/tutorsuperset/templates/superset/apps/pythonpath/superset_config_docker.py b/tutorsuperset/templates/superset/apps/pythonpath/superset_config_docker.py new file mode 100644 index 0000000..e2b183c --- /dev/null +++ b/tutorsuperset/templates/superset/apps/pythonpath/superset_config_docker.py @@ -0,0 +1,76 @@ +import os +from flask_appbuilder.security.manager import AUTH_OAUTH + +# Application secret key + +SECRET_KEY = os.environ["SECRET_KEY"] + +# Credentials for connecting to the Open edX MySQL database +OPENEDX_DATABASE = { + 'host': os.environ["OPENEDX_MYSQL_HOST"], + 'port': os.environ["OPENEDX_MYSQL_PORT"], + 'database': os.environ["OPENEDX_MYSQL_DATABASE"], + 'user': os.environ["OPENEDX_MYSQL_USERNAME"], + 'password': os.environ["OPENEDX_MYSQL_PASSWORD"], +} + +# Set the authentication type to OAuth +AUTH_TYPE = AUTH_OAUTH + +OAUTH_PROVIDERS = [ + { 'name':'openedxsso', + 'token_key':'access_token', # Name of the token in the response of access_token_url + 'icon':'fa-address-card', # Icon for the provider + 'remote_app': { + 'client_id': os.environ["OAUTH2_CLIENT_ID"], + 'client_secret': os.environ["OAUTH2_CLIENT_SECRET"], + 'client_kwargs':{ + 'scope': 'read' # Scope for the Authorization + }, + 'access_token_method':'POST', # HTTP Method to call access_token_url + 'access_token_params':{ # Additional parameters for calls to access_token_url + 'client_id': os.environ["OAUTH2_CLIENT_ID"], + }, + 'access_token_headers':{ # Additional headers for calls to access_token_url + 'Authorization': 'Basic Base64EncodedClientIdAndSecret' + }, + 'api_base_url': os.environ["OAUTH2_BASE_URL"], + 'access_token_url': os.environ["OAUTH2_ACCESS_TOKEN_URL"], + 'authorize_url': os.environ["OAUTH2_AUTHORIZE_URL"], + } + } +] + +# Will allow user self registration, allowing to create Flask users from Authorized User +AUTH_USER_REGISTRATION = True + +# The default user self registration role +AUTH_USER_REGISTRATION_ROLE = "Gamma" + +# Should we replace ALL the user's roles each login, or only on registration? +AUTH_ROLES_SYNC_AT_LOGIN = True + +# map from the values of `userinfo["role_keys"]` to a list of Superset roles +# cf https://superset.apache.org/docs/security/#roles +AUTH_ROLES_MAPPING = { + "admin": ["Admin"], # Superusers + "alpha": ["Alpha"], # Global staff + "gamma": ["Gamma"], # Course staff + "openedx": ["Open edX"], # Open edX datastore, manually created + "public": ["Public"], # AKA anonymous users +} + +from openedx_sso_security_manager import OpenEdxSsoSecurityManager, can_view_courses +CUSTOM_SECURITY_MANAGER = OpenEdxSsoSecurityManager + + +# Enable use of variables in datasets/queries +FEATURE_FLAGS = { + "ALERT_REPORTS": True, + "ENABLE_TEMPLATE_PROCESSING": True, +} + +# Add this custom template processor which returns the list of courses the current user can access +JINJA_CONTEXT_ADDONS = { + 'can_view_courses': can_view_courses +} diff --git a/tutorsuperset/templates/superset/tasks/init.sh b/tutorsuperset/templates/superset/tasks/init.sh new file mode 100644 index 0000000..00c6845 --- /dev/null +++ b/tutorsuperset/templates/superset/tasks/init.sh @@ -0,0 +1,9 @@ +# Create a DOT applicaton so Superset can use Open edX authenticatioon +./manage.py lms manage_user superset superset@apache +./manage.py lms create_dot_application \ + --grant-type authorization-code \ + --redirect-uris "{% if ENABLE_HTTPS %}https{% else %}http{% endif %}://{{ SUPERSET_HOST }}:{{ SUPERSET_PORT }}/oauth-authorized/openedxsso" \ + --client-id {{ SUPERSET_OAUTH2_CLIENT_ID }} \ + --client-secret {{ SUPERSET_OAUTH2_CLIENT_SECRET }} \ + --scopes user_id \ + superset-sso superset