[bugfix] convert metrics to numeric in dataframe (#4726)

* [bugfix] convert metrics to numeric in dataframe It appears sometimes the dbapi driver and pandas's read_sql fail at returning the proper numeric types for metrics and they show up as `object` in the dataframe. This results in "No numeric types to aggregate" errors when trying to perform aggregations or pivoting in pandas. This PR looks for metrics in dataframes that are typed as "object" and uses pandas' to_numeric to convert. * Fix tests * Remove all iteritems
apache · Apr 3, 2018 · f6fe11f · f6fe11f
1 parent aa4173d
commit f6fe11f
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 4 deletions.
diff --git a/superset/models/core.py b/superset/models/core.py
@@ -702,7 +702,7 @@ def needs_conversion(df_series):
                 return True
             return False
 
-        for k, v in df.dtypes.iteritems():
+        for k, v in df.dtypes.items():
             if v.type == numpy.object_ and needs_conversion(df[k]):
                 df[k] = df[k].apply(utils.json_dumps_w_dates)
         return df

diff --git a/superset/viz.py b/superset/viz.py
@@ -170,11 +170,21 @@ def get_df(self, query_obj=None):
                 if self.datasource.offset:
                     df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
                 df[DTTM_ALIAS] += self.time_shift
+
+            self.df_metrics_to_num(df, query_obj.get('metrics') or [])
+
             df.replace([np.inf, -np.inf], np.nan)
             fillna = self.get_fillna_for_columns(df.columns)
             df = df.fillna(fillna)
         return df
 
+    @staticmethod
+    def df_metrics_to_num(df, metrics):
+        """Converting metrics to numeric when pandas.read_sql cannot"""
+        for col, dtype in df.dtypes.items():
+            if dtype.type == np.object_ and col in metrics:
+                df[col] = pd.to_numeric(df[col])
+
     def query_obj(self):
         """Building a query object"""
         form_data = self.form_data
@@ -1060,7 +1070,6 @@ def process_data(self, df, aggregate=False):
         df = df.fillna(0)
         if fd.get('granularity') == 'all':
             raise Exception(_('Pick a time granularity for your time series'))
-
         if not aggregate:
             df = df.pivot_table(
                 index=DTTM_ALIAS,
@@ -1384,7 +1393,7 @@ def get_data(self, df):
             pt = (pt / pt.sum()).T
         pt = pt.reindex(row.index)
         chart_data = []
-        for name, ys in pt.iteritems():
+        for name, ys in pt.items():
             if pt[name].dtype.kind not in 'biufc' or name in self.groupby:
                 continue
             if isinstance(name, string_types):
@@ -1395,7 +1404,7 @@ def get_data(self, df):
                 l = [str(s) for s in name[1:]]  # noqa: E741
                 series_title = ', '.join(l)
             values = []
-            for i, v in ys.iteritems():
+            for i, v in ys.items():
                 x = i
                 if isinstance(x, (tuple, list)):
                     x = ', '.join([text_type(s) for s in x])

diff --git a/tests/viz_tests.py b/tests/viz_tests.py
@@ -77,6 +77,8 @@ def test_get_df_handles_dttm_col(self):
         results.df.empty = False
         datasource.query = Mock(return_value=results)
         test_viz = viz.BaseViz(datasource, form_data)
+
+        test_viz.df_metrics_to_num = Mock()
         test_viz.get_fillna_for_columns = Mock(return_value=0)
         test_viz.get_df(query_obj)
         mock_call = df.__setitem__.mock_calls[0]