Skip to content

Commit

Permalink
fix: Only rewrite numeric ineq joins (#19083)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Oct 3, 2024
1 parent af5ac44 commit 310b482
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 5 deletions.
21 changes: 19 additions & 2 deletions crates/polars-plan/src/plans/conversion/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,25 @@ fn resolve_join_where(
)?;

if let Some(ie_op_) = to_inequality_operator(&op) {
// We already have an IEjoin or an Inner join, push to remaining
if ie_op.len() >= 2 || !eq_right_on.is_empty() {
fn is_numeric(e: &Expr, schema: &Schema) -> bool {
expr_to_leaf_column_names_iter(e).any(|name| {
if let Some(dt) = schema.get(name.as_str()) {
dt.to_physical().is_numeric()
} else {
false
}
})
}

// We fallback to remaining if:
// - we already have an IEjoin or Inner join
// - we already have an Inner join
// - data is not numeric (our iejoin doesn't yet implement that)
if ie_op.len() >= 2
|| !eq_right_on.is_empty()
|| !is_numeric(&left, &schema_left)
|| !is_numeric(&right, &schema_right)
{
remaining_preds.push(to_binary_post_join(left, op, right, &schema_right, &suffix))
} else {
ie_left_on.push(left);
Expand Down
26 changes: 23 additions & 3 deletions crates/polars-plan/src/plans/optimizer/collapse_joins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

use std::sync::Arc;

use polars_core::schema::SchemaRef;
use polars_core::schema::*;
#[cfg(feature = "iejoin")]
use polars_ops::frame::{IEJoinOptions, InequalityOperator};
use polars_ops::frame::{JoinCoalesce, JoinType};
Expand Down Expand Up @@ -304,8 +304,28 @@ pub fn optimize(root: Node, lp_arena: &mut Arena<IR>, expr_arena: &mut Arena<AEx
} else {
#[cfg(feature = "iejoin")]
if let Some(ie_op_) = to_inequality_operator(&op) {
// We already have an IEjoin or an Inner join, push to remaining
if ie_op.len() >= 2 || !eq_left_on.is_empty() {
fn is_numeric(
node: Node,
expr_arena: &Arena<AExpr>,
schema: &Schema,
) -> bool {
aexpr_to_leaf_names_iter(node, expr_arena).any(|name| {
if let Some(dt) = schema.get(name.as_str()) {
dt.to_physical().is_numeric()
} else {
false
}
})
}

// We fallback to remaining if:
// - we already have an IEjoin or Inner join
// - we already have an Inner join
// - data is not numeric (our iejoin doesn't yet implement that)
if ie_op.len() >= 2
|| !eq_left_on.is_empty()
|| !is_numeric(left, expr_arena, left_schema)
{
remaining_predicates.push(node);
} else {
ie_left_on.push(ExprIR::from_node(left, expr_arena));
Expand Down
19 changes: 19 additions & 0 deletions py-polars/tests/unit/operations/test_inequality_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,3 +575,22 @@ def test_raise_invalid_predicate() -> None:

with pytest.raises(pl.exceptions.InvalidOperationError):
left.join_where(right, pl.col.index >= pl.col.a).collect()


def test_join_on_strings() -> None:
df = pl.LazyFrame(
{
"a": ["a", "b", "c"],
"b": ["b", "b", "b"],
}
)

q = df.join_where(df, pl.col("a").ge(pl.col("a_right")))

assert "CROSS JOIN" in q.explain()
assert q.collect().to_dict(as_series=False) == {
"a": ["a", "b", "b", "c", "c", "c"],
"b": ["b", "b", "b", "b", "b", "b"],
"a_right": ["a", "a", "b", "a", "b", "c"],
"b_right": ["b", "b", "b", "b", "b", "b"],
}

0 comments on commit 310b482

Please sign in to comment.