From 6d8dd5f29ef03eaf7427e44362644e8746357e3f Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 8 Sep 2023 14:05:28 +0200 Subject: [PATCH 1/2] fix(python, rust): enforce sortedness of by argument in rolling_* functions --- crates/polars-plan/src/dsl/mod.rs | 3 +++ .../unit/operations/rolling/test_rolling.py | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index 6267a8e42354..fca087abf8b1 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -1,5 +1,7 @@ #![allow(ambiguous_glob_reexports)] //! Domain specific language for the Lazy API. +#[cfg(feature = "rolling_window")] +use polars_core::utils::ensure_sorted_arg; #[cfg(feature = "dtype-categorical")] pub mod cat; #[cfg(feature = "dtype-categorical")] @@ -1240,6 +1242,7 @@ impl Expr { ), _ => (by.clone(), &None), }; + ensure_sorted_arg(&by, expr_name)?; let by = by.datetime().unwrap(); let by_values = by.cont_slice().map_err(|_| { polars_err!( diff --git a/py-polars/tests/unit/operations/rolling/test_rolling.py b/py-polars/tests/unit/operations/rolling/test_rolling.py index e9b6e616f5ca..17d8bfd0f783 100644 --- a/py-polars/tests/unit/operations/rolling/test_rolling.py +++ b/py-polars/tests/unit/operations/rolling/test_rolling.py @@ -48,7 +48,7 @@ def example_df() -> pl.DataFrame: def test_rolling_kernels_and_group_by_rolling( example_df: pl.DataFrame, period: str | timedelta, closed: ClosedInterval ) -> None: - out1 = example_df.select( + out1 = example_df.set_sorted("dt").select( [ pl.col("dt"), # this differs from group_by aggregation because the empty window is @@ -789,7 +789,7 @@ def test_rolling_empty_window_9406() -> None: "d", [datetime(2019, 1, x) for x in [16, 17, 18, 22, 23]], dtype=pl.Datetime(time_unit="us", time_zone=None), - ) + ).set_sorted() rawdata = pl.Series("x", [1.1, 1.2, 1.3, 1.15, 1.25], dtype=pl.Float64) rmin = pl.Series("x", [None, 1.1, 1.1, None, 1.15], dtype=pl.Float64) rmax = pl.Series("x", [None, 1.1, 1.2, None, 1.15], dtype=pl.Float64) @@ -834,6 +834,20 @@ def test_rolling_weighted_quantile_10031() -> None: ) +def test_rolling_aggregations_unsorted_raise_10991() -> None: + df = pl.DataFrame( + { + "dt": [datetime(2020, 1, 3), datetime(2020, 1, 1), datetime(2020, 1, 2)], + "val": [1, 2, 3], + } + ) + with pytest.raises( + pl.InvalidOperationError, + match="argument in operation 'rolling_sum' is not explicitly sorted", + ): + df.with_columns(roll=pl.col("val").rolling_sum("2d", by="dt", closed="right")) + + def test_rolling() -> None: a = pl.Series("a", [1, 2, 3, 2, 1]) assert_series_equal(a.rolling_min(2), pl.Series("a", [None, 1, 2, 2, 1])) From 306418b5488ad256f6c8c86d135cbab588dda0b0 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 12 Sep 2023 20:40:45 +0200 Subject: [PATCH 2/2] post-merge fixup --- py-polars/tests/unit/operations/rolling/test_rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/tests/unit/operations/rolling/test_rolling.py b/py-polars/tests/unit/operations/rolling/test_rolling.py index 37a61493c6b0..27f57e660a7c 100644 --- a/py-polars/tests/unit/operations/rolling/test_rolling.py +++ b/py-polars/tests/unit/operations/rolling/test_rolling.py @@ -928,7 +928,7 @@ def test_rolling_nanoseconds_11003() -> None: "val": [1, 2, 3], } ) - df = df.with_columns(pl.col("dt").str.to_datetime(time_unit="ns")) + df = df.with_columns(pl.col("dt").str.to_datetime(time_unit="ns")).set_sorted("dt") result = df.with_columns( pl.col("val").rolling_sum("500ns", by="dt", closed="right") )