Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incorporate feedback from initial PR (prematurely merged to lyft-release-sp8) #7415

Merged
merged 6 commits into from
May 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 26 additions & 14 deletions superset/db_engine_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import re
import textwrap
import time
from typing import List, Tuple

from flask import g
from flask_babel import lazy_gettext as _
Expand Down Expand Up @@ -840,13 +841,13 @@ def _create_column_info(cls, column: RowProxy, name: str, data_type: str) -> dic
}

@classmethod
def _get_full_name(cls, names: list) -> str:
def _get_full_name(cls, names: List[Tuple[str, str]]) -> str:
"""
Get the full column name
:param names: list of all individual column names
:return: full column name
"""
return '.'.join(row_type[0] for row_type in names if row_type[0] is not None)
return '.'.join(column[0] for column in names if column[0])

@classmethod
def _has_nested_data_types(cls, component_type: str) -> bool:
Expand All @@ -862,20 +863,20 @@ def _has_nested_data_types(cls, component_type: str) -> bool:
or re.search(white_space_regex, component_type) is not None

@classmethod
def _split_data_type(cls, data_type: str, delimiter: str) -> list:
def _split_data_type(cls, data_type: str, delimiter: str) -> List[str]:
"""
Split data type based on given delimiter. Do not split the string if the
delimiter is enclosed in quotes
:param data_type: data type
:param delimiter: string separator (i.e. open parenthesis, closed parenthesis,
comma, whitespace)
:return:list of strings after breaking it by the delimiter
:return: list of strings after breaking it by the delimiter
"""
return re.split(
r'{}(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)'.format(delimiter), data_type)

@classmethod
def _parse_structural_column(cls, column: RowProxy, result: list) -> None:
def _parse_structural_column(cls, column: RowProxy, result: List[dict]) -> None:
"""
Parse a row or array column
:param column: column
Expand All @@ -885,7 +886,7 @@ def _parse_structural_column(cls, column: RowProxy, result: list) -> None:
# split on open parenthesis ( to get the structural
# data type and its component types
data_types = cls._split_data_type(full_data_type, r'\(')
stack: list = []
stack: List[Tuple[str, str]] = []
for data_type in data_types:
# split on closed parenthesis ) to track which component
# types belong to what structural data type
Expand Down Expand Up @@ -926,15 +927,16 @@ def _parse_structural_column(cls, column: RowProxy, result: list) -> None:
# We have an array of row objects (i.e. array(row(...)))
elif 'array' == inner_type or 'row' == inner_type:
# Push a dummy object to represent the structural data type
stack.append((None, inner_type))
stack.append(('', inner_type))
# We have an array of a basic data types(i.e. array(varchar)).
elif len(stack) > 0:
# Because it is an array of a basic data type. We have finished
# parsing the structural data type and can move on.
stack.pop()

@classmethod
def _show_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list:
def _show_columns(
cls, inspector: Inspector, table_name: str, schema: str) -> List[RowProxy]:
"""
Show presto column names
:param inspector: object that performs database schema inspection
Expand All @@ -950,7 +952,8 @@ def _show_columns(cls, inspector: Inspector, table_name: str, schema: str) -> li
return columns

@classmethod
def get_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list:
def get_columns(
cls, inspector: Inspector, table_name: str, schema: str) -> List[dict]:
"""
Get columns from a Presto data source. This includes handling row and
array data types
Expand All @@ -961,7 +964,7 @@ def get_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list
(i.e. column name and data type)
"""
columns = cls._show_columns(inspector, table_name, schema)
result: list = []
result: List[dict] = []
for column in columns:
try:
# parse column if it is a row or array
Expand All @@ -971,7 +974,7 @@ def get_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list
else: # otherwise column is a basic data type
column_type = presto_type_map[column.Type]()
except KeyError:
print('Did not recognize type {} of column {}'.format(
logging.info('Did not recognize type {} of column {}'.format(
column.Type, column.Column))
column_type = types.NullType
result.append(cls._create_column_info(column, column.Column, column_type))
Expand All @@ -980,7 +983,7 @@ def get_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list
@classmethod
def select_star(cls, my_db, table_name: str, engine: Engine, schema: str = None,
limit: int = 100, show_cols: bool = False, indent: bool = True,
latest_partition: bool = True, cols: list = []) -> str:
latest_partition: bool = True, cols: List[dict] = []) -> str:
"""
Temporary method until we have a function that can handle row and array columns
"""
Expand All @@ -989,7 +992,7 @@ def select_star(cls, my_db, table_name: str, engine: Engine, schema: str = None,
dot_regex = r'\.(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)'
presto_cols = [
col for col in presto_cols if re.search(dot_regex, col['name']) is None]
return super(PrestoEngineSpec, cls).select_star(
return BaseEngineSpec.select_star(
my_db, table_name, engine, schema, limit,
show_cols, indent, latest_partition, presto_cols,
)
Expand Down Expand Up @@ -1504,7 +1507,8 @@ def handle_cursor(cls, cursor, query, session):
polled = cursor.poll()

@classmethod
def get_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list:
def get_columns(
cls, inspector: Inspector, table_name: str, schema: str) -> List[dict]:
return inspector.get_columns(table_name, schema)

@classmethod
Expand Down Expand Up @@ -1538,6 +1542,14 @@ def _partition_query(
cls, table_name, limit=0, order_by=None, filters=None):
return f'SHOW PARTITIONS {table_name}'

@classmethod
def select_star(cls, my_db, table_name: str, engine: Engine, schema: str = None,
limit: int = 100, show_cols: bool = False, indent: bool = True,
latest_partition: bool = True, cols: List[dict] = []) -> str:
return BaseEngineSpec.select_star(
my_db, table_name, engine, schema, limit,
show_cols, indent, latest_partition, cols)

@classmethod
def modify_url_for_impersonation(cls, url, impersonate_user, username):
"""
Expand Down
16 changes: 16 additions & 0 deletions superset/models/sql_types/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
33 changes: 25 additions & 8 deletions superset/models/sql_types/presto_sql_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,42 +26,59 @@ class TinyInteger(Integer):
"""
A type for tiny ``int`` integers.
"""
def _compiler_dispatch(self, visitor, **kw):
def python_type(self):
return int

@classmethod
def _compiler_dispatch(cls, _visitor, **_kw):
return 'TINYINT'


class Interval(TypeEngine):
"""
A type for intervals.
"""
def _compiler_dispatch(self, visitor, **kw):
def python_type(self):
return None

@classmethod
def _compiler_dispatch(cls, _visitor, **_kw):
return 'INTERVAL'


class Array(TypeEngine):

"""
A type for arrays.
"""
def _compiler_dispatch(self, visitor, **kw):
def python_type(self):
return list

@classmethod
def _compiler_dispatch(cls, _visitor, **_kw):
return 'ARRAY'


class Map(TypeEngine):

"""
A type for maps.
"""
def _compiler_dispatch(self, visitor, **kw):
def python_type(self):
return dict

@classmethod
def _compiler_dispatch(cls, _visitor, **_kw):
return 'MAP'


class Row(TypeEngine):

"""
A type for rows.
"""
def _compiler_dispatch(self, visitor, **kw):
def python_type(self):
return None

@classmethod
def _compiler_dispatch(cls, _visitor, **_kw):
return 'ROW'


Expand Down