From 5e7967752200524de99b12eead11a5243a68ff9e Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Fri, 30 Mar 2018 22:45:47 +0000 Subject: [PATCH 1/3] [bugfix] convert metrics to numeric in dataframe It appears sometimes the dbapi driver and pandas's read_sql fail at returning the proper numeric types for metrics and they show up as `object` in the dataframe. This results in "No numeric types to aggregate" errors when trying to perform aggregations or pivoting in pandas. This PR looks for metrics in dataframes that are typed as "object" and uses pandas' to_numeric to convert. --- superset/viz.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/superset/viz.py b/superset/viz.py index 5d98d5e60e55b..f8ffe88162dd8 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -170,6 +170,13 @@ def get_df(self, query_obj=None): if self.datasource.offset: df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset) df[DTTM_ALIAS] += self.time_shift + + # Converting metrics to numeric when pandas.read_sql cannot + metrics = query_obj.get('metrics') or [] + for col, dtype in df.dtypes.iteritems(): + if dtype.type == np.object_ and col in metrics: + df[col] = pd.to_numeric(df[col]) + df.replace([np.inf, -np.inf], np.nan) fillna = self.get_fillna_for_columns(df.columns) df = df.fillna(fillna) @@ -1060,7 +1067,6 @@ def process_data(self, df, aggregate=False): df = df.fillna(0) if fd.get('granularity') == 'all': raise Exception(_('Pick a time granularity for your time series')) - if not aggregate: df = df.pivot_table( index=DTTM_ALIAS, From f6471a86354541f91c0e64ac7d206499fb78743e Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Fri, 30 Mar 2018 23:26:14 +0000 Subject: [PATCH 2/3] Fix tests --- superset/viz.py | 13 ++++++++----- tests/viz_tests.py | 2 ++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/superset/viz.py b/superset/viz.py index f8ffe88162dd8..adb2e901a9a54 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -171,17 +171,20 @@ def get_df(self, query_obj=None): df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset) df[DTTM_ALIAS] += self.time_shift - # Converting metrics to numeric when pandas.read_sql cannot - metrics = query_obj.get('metrics') or [] - for col, dtype in df.dtypes.iteritems(): - if dtype.type == np.object_ and col in metrics: - df[col] = pd.to_numeric(df[col]) + self.df_metrics_to_num(df, query_obj.get('metrics') or []) df.replace([np.inf, -np.inf], np.nan) fillna = self.get_fillna_for_columns(df.columns) df = df.fillna(fillna) return df + @staticmethod + def df_metrics_to_num(df, metrics): + """Converting metrics to numeric when pandas.read_sql cannot""" + for col, dtype in df.dtypes.iteritems(): + if dtype.type == np.object_ and col in metrics: + df[col] = pd.to_numeric(df[col]) + def query_obj(self): """Building a query object""" form_data = self.form_data diff --git a/tests/viz_tests.py b/tests/viz_tests.py index 6822837e28312..a5adfc1f2540f 100644 --- a/tests/viz_tests.py +++ b/tests/viz_tests.py @@ -77,6 +77,8 @@ def test_get_df_handles_dttm_col(self): results.df.empty = False datasource.query = Mock(return_value=results) test_viz = viz.BaseViz(datasource, form_data) + + test_viz.df_metrics_to_num = Mock() test_viz.get_fillna_for_columns = Mock(return_value=0) test_viz.get_df(query_obj) mock_call = df.__setitem__.mock_calls[0] From 32a03cb5b847b14cf660b7325f9f6fa1a5a5f761 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Mon, 2 Apr 2018 21:30:36 +0000 Subject: [PATCH 3/3] Remove all iteritems --- superset/models/core.py | 2 +- superset/viz.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index c32ac5c229635..6eef48c688ccc 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -702,7 +702,7 @@ def needs_conversion(df_series): return True return False - for k, v in df.dtypes.iteritems(): + for k, v in df.dtypes.items(): if v.type == numpy.object_ and needs_conversion(df[k]): df[k] = df[k].apply(utils.json_dumps_w_dates) return df diff --git a/superset/viz.py b/superset/viz.py index adb2e901a9a54..2cab0f18d0121 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -181,7 +181,7 @@ def get_df(self, query_obj=None): @staticmethod def df_metrics_to_num(df, metrics): """Converting metrics to numeric when pandas.read_sql cannot""" - for col, dtype in df.dtypes.iteritems(): + for col, dtype in df.dtypes.items(): if dtype.type == np.object_ and col in metrics: df[col] = pd.to_numeric(df[col]) @@ -1393,7 +1393,7 @@ def get_data(self, df): pt = (pt / pt.sum()).T pt = pt.reindex(row.index) chart_data = [] - for name, ys in pt.iteritems(): + for name, ys in pt.items(): if pt[name].dtype.kind not in 'biufc' or name in self.groupby: continue if isinstance(name, string_types): @@ -1404,7 +1404,7 @@ def get_data(self, df): l = [str(s) for s in name[1:]] # noqa: E741 series_title = ', '.join(l) values = [] - for i, v in ys.iteritems(): + for i, v in ys.items(): x = i if isinstance(x, (tuple, list)): x = ', '.join([text_type(s) for s in x])