Skip to content

Commit

Permalink
Incorporate feedback from initial PR (prematurely merged to lyft-rele…
Browse files Browse the repository at this point in the history
…ase-sp8) (apache#7415)

* add stronger type hints where possible

* fix: lint issues and add select_star func in Hive

* add missing pkg init

* fix: build issues

* fix: pylint issues

* fix: use logging instead of print
  • Loading branch information
DiggidyDave authored and xtinec committed May 1, 2019
1 parent bfd685e commit 51c7f92
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 22 deletions.
40 changes: 26 additions & 14 deletions superset/db_engine_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import re
import textwrap
import time
from typing import List, Tuple

from flask import g
from flask_babel import lazy_gettext as _
Expand Down Expand Up @@ -840,13 +841,13 @@ def _create_column_info(cls, column: RowProxy, name: str, data_type: str) -> dic
}

@classmethod
def _get_full_name(cls, names: list) -> str:
def _get_full_name(cls, names: List[Tuple[str, str]]) -> str:
"""
Get the full column name
:param names: list of all individual column names
:return: full column name
"""
return '.'.join(row_type[0] for row_type in names if row_type[0] is not None)
return '.'.join(column[0] for column in names if column[0])

@classmethod
def _has_nested_data_types(cls, component_type: str) -> bool:
Expand All @@ -862,20 +863,20 @@ def _has_nested_data_types(cls, component_type: str) -> bool:
or re.search(white_space_regex, component_type) is not None

@classmethod
def _split_data_type(cls, data_type: str, delimiter: str) -> list:
def _split_data_type(cls, data_type: str, delimiter: str) -> List[str]:
"""
Split data type based on given delimiter. Do not split the string if the
delimiter is enclosed in quotes
:param data_type: data type
:param delimiter: string separator (i.e. open parenthesis, closed parenthesis,
comma, whitespace)
:return:list of strings after breaking it by the delimiter
:return: list of strings after breaking it by the delimiter
"""
return re.split(
r'{}(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)'.format(delimiter), data_type)

@classmethod
def _parse_structural_column(cls, column: RowProxy, result: list) -> None:
def _parse_structural_column(cls, column: RowProxy, result: List[dict]) -> None:
"""
Parse a row or array column
:param column: column
Expand All @@ -885,7 +886,7 @@ def _parse_structural_column(cls, column: RowProxy, result: list) -> None:
# split on open parenthesis ( to get the structural
# data type and its component types
data_types = cls._split_data_type(full_data_type, r'\(')
stack: list = []
stack: List[Tuple[str, str]] = []
for data_type in data_types:
# split on closed parenthesis ) to track which component
# types belong to what structural data type
Expand Down Expand Up @@ -926,15 +927,16 @@ def _parse_structural_column(cls, column: RowProxy, result: list) -> None:
# We have an array of row objects (i.e. array(row(...)))
elif 'array' == inner_type or 'row' == inner_type:
# Push a dummy object to represent the structural data type
stack.append((None, inner_type))
stack.append(('', inner_type))
# We have an array of a basic data types(i.e. array(varchar)).
elif len(stack) > 0:
# Because it is an array of a basic data type. We have finished
# parsing the structural data type and can move on.
stack.pop()

@classmethod
def _show_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list:
def _show_columns(
cls, inspector: Inspector, table_name: str, schema: str) -> List[RowProxy]:
"""
Show presto column names
:param inspector: object that performs database schema inspection
Expand All @@ -950,7 +952,8 @@ def _show_columns(cls, inspector: Inspector, table_name: str, schema: str) -> li
return columns

@classmethod
def get_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list:
def get_columns(
cls, inspector: Inspector, table_name: str, schema: str) -> List[dict]:
"""
Get columns from a Presto data source. This includes handling row and
array data types
Expand All @@ -961,7 +964,7 @@ def get_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list
(i.e. column name and data type)
"""
columns = cls._show_columns(inspector, table_name, schema)
result: list = []
result: List[dict] = []
for column in columns:
try:
# parse column if it is a row or array
Expand All @@ -971,7 +974,7 @@ def get_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list
else: # otherwise column is a basic data type
column_type = presto_type_map[column.Type]()
except KeyError:
print('Did not recognize type {} of column {}'.format(
logging.info('Did not recognize type {} of column {}'.format(
column.Type, column.Column))
column_type = types.NullType
result.append(cls._create_column_info(column, column.Column, column_type))
Expand All @@ -980,7 +983,7 @@ def get_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list
@classmethod
def select_star(cls, my_db, table_name: str, engine: Engine, schema: str = None,
limit: int = 100, show_cols: bool = False, indent: bool = True,
latest_partition: bool = True, cols: list = []) -> str:
latest_partition: bool = True, cols: List[dict] = []) -> str:
"""
Temporary method until we have a function that can handle row and array columns
"""
Expand All @@ -989,7 +992,7 @@ def select_star(cls, my_db, table_name: str, engine: Engine, schema: str = None,
dot_regex = r'\.(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)'
presto_cols = [
col for col in presto_cols if re.search(dot_regex, col['name']) is None]
return super(PrestoEngineSpec, cls).select_star(
return BaseEngineSpec.select_star(
my_db, table_name, engine, schema, limit,
show_cols, indent, latest_partition, presto_cols,
)
Expand Down Expand Up @@ -1504,7 +1507,8 @@ def handle_cursor(cls, cursor, query, session):
polled = cursor.poll()

@classmethod
def get_columns(cls, inspector: Inspector, table_name: str, schema: str) -> list:
def get_columns(
cls, inspector: Inspector, table_name: str, schema: str) -> List[dict]:
return inspector.get_columns(table_name, schema)

@classmethod
Expand Down Expand Up @@ -1538,6 +1542,14 @@ def _partition_query(
cls, table_name, limit=0, order_by=None, filters=None):
return f'SHOW PARTITIONS {table_name}'

@classmethod
def select_star(cls, my_db, table_name: str, engine: Engine, schema: str = None,
limit: int = 100, show_cols: bool = False, indent: bool = True,
latest_partition: bool = True, cols: List[dict] = []) -> str:
return BaseEngineSpec.select_star(
my_db, table_name, engine, schema, limit,
show_cols, indent, latest_partition, cols)

@classmethod
def modify_url_for_impersonation(cls, url, impersonate_user, username):
"""
Expand Down
16 changes: 16 additions & 0 deletions superset/models/sql_types/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
33 changes: 25 additions & 8 deletions superset/models/sql_types/presto_sql_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,42 +26,59 @@ class TinyInteger(Integer):
"""
A type for tiny ``int`` integers.
"""
def _compiler_dispatch(self, visitor, **kw):
def python_type(self):
return int

@classmethod
def _compiler_dispatch(cls, _visitor, **_kw):
return 'TINYINT'


class Interval(TypeEngine):
"""
A type for intervals.
"""
def _compiler_dispatch(self, visitor, **kw):
def python_type(self):
return None

@classmethod
def _compiler_dispatch(cls, _visitor, **_kw):
return 'INTERVAL'


class Array(TypeEngine):

"""
A type for arrays.
"""
def _compiler_dispatch(self, visitor, **kw):
def python_type(self):
return list

@classmethod
def _compiler_dispatch(cls, _visitor, **_kw):
return 'ARRAY'


class Map(TypeEngine):

"""
A type for maps.
"""
def _compiler_dispatch(self, visitor, **kw):
def python_type(self):
return dict

@classmethod
def _compiler_dispatch(cls, _visitor, **_kw):
return 'MAP'


class Row(TypeEngine):

"""
A type for rows.
"""
def _compiler_dispatch(self, visitor, **kw):
def python_type(self):
return None

@classmethod
def _compiler_dispatch(cls, _visitor, **_kw):
return 'ROW'


Expand Down

0 comments on commit 51c7f92

Please sign in to comment.