diff --git a/superset/result_set.py b/superset/result_set.py index 82b0a313935e3..008699d683447 100644 --- a/superset/result_set.py +++ b/superset/result_set.py @@ -174,7 +174,10 @@ def convert_pa_dtype(pa_dtype: pa.DataType) -> Optional[str]: @staticmethod def convert_table_to_df(table: pa.Table) -> pd.DataFrame: - return table.to_pandas(integer_object_nulls=True) + try: + return table.to_pandas(integer_object_nulls=True) + except pa.lib.ArrowInvalid: + return table.to_pandas(integer_object_nulls=True, timestamp_as_object=True) @staticmethod def first_nonempty(items: List[Any]) -> Any: diff --git a/superset/utils/core.py b/superset/utils/core.py index aafe18a1e1208..4177e018414af 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -1762,14 +1762,14 @@ def normalize_dttm_col( # Column is formatted as a numeric value unit = timestamp_format.replace("epoch_", "") df[DTTM_ALIAS] = pd.to_datetime( - dttm_col, utc=False, unit=unit, origin="unix" + dttm_col, utc=False, unit=unit, origin="unix", errors="coerce" ) else: # Column has already been formatted as a timestamp. df[DTTM_ALIAS] = dttm_col.apply(pd.Timestamp) else: df[DTTM_ALIAS] = pd.to_datetime( - df[DTTM_ALIAS], utc=False, format=timestamp_format + df[DTTM_ALIAS], utc=False, format=timestamp_format, errors="coerce" ) if offset: df[DTTM_ALIAS] += timedelta(hours=offset) diff --git a/tests/integration_tests/utils_tests.py b/tests/integration_tests/utils_tests.py index 012bb96ca97b3..0e145eab7fbf5 100644 --- a/tests/integration_tests/utils_tests.py +++ b/tests/integration_tests/utils_tests.py @@ -1201,3 +1201,8 @@ def normalize_col( # test numeric epoch_ms format df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}]) assert normalize_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts + + # test that out of bounds timestamps are coerced to None instead of + # erroring out + df = pd.DataFrame([{"__timestamp": "1677-09-21 00:00:00", "a": 1}]) + assert pd.isnull(normalize_col(df, None, 0, None)[DTTM_ALIAS][0]) diff --git a/tests/unit_tests/dataframe_test.py b/tests/unit_tests/dataframe_test.py index 79625cffe63de..8785ee1d7b3ec 100644 --- a/tests/unit_tests/dataframe_test.py +++ b/tests/unit_tests/dataframe_test.py @@ -15,6 +15,11 @@ # specific language governing permissions and limitations # under the License. # pylint: disable=unused-argument, import-outside-toplevel +from datetime import datetime + +import pytest +from pandas import Timestamp + from superset.dataframe import df_to_records from superset.superset_typing import DbapiDescription @@ -53,3 +58,50 @@ def test_js_max_int(app_context: None) -> None: {"a": 1, "b": "1239162456494753670", "c": "c1"}, {"a": 2, "b": 100, "c": "c2"}, ] + + +@pytest.mark.parametrize( + "input_, expected", + [ + pytest.param( + [ + (datetime.strptime("1677-09-22 00:12:43", "%Y-%m-%d %H:%M:%S"), 1), + (datetime.strptime("2262-04-11 23:47:17", "%Y-%m-%d %H:%M:%S"), 2), + ], + [ + { + "a": datetime.strptime("1677-09-22 00:12:43", "%Y-%m-%d %H:%M:%S"), + "b": 1, + }, + { + "a": datetime.strptime("2262-04-11 23:47:17", "%Y-%m-%d %H:%M:%S"), + "b": 2, + }, + ], + id="timestamp conversion fail", + ), + pytest.param( + [ + (datetime.strptime("1677-09-22 00:12:44", "%Y-%m-%d %H:%M:%S"), 1), + (datetime.strptime("2262-04-11 23:47:16", "%Y-%m-%d %H:%M:%S"), 2), + ], + [ + {"a": Timestamp("1677-09-22 00:12:44"), "b": 1}, + {"a": Timestamp("2262-04-11 23:47:16"), "b": 2}, + ], + id="timestamp conversion success", + ), + ], +) +def test_max_pandas_timestamp(input_, expected) -> None: + from superset.db_engine_specs import BaseEngineSpec + from superset.result_set import SupersetResultSet + + cursor_descr: DbapiDescription = [ + ("a", "datetime", None, None, None, None, False), + ("b", "int", None, None, None, None, False), + ] + results = SupersetResultSet(input_, cursor_descr, BaseEngineSpec) + df = results.to_pandas_df() + + assert df_to_records(df) == expected