Skip to content

Commit

Permalink
Refactor to support recursive unnest in physical plan (#11577)
Browse files Browse the repository at this point in the history
* chore: poc

* fix unnest struct

* UT for memoization

* remove unnessary projection

* chore: temp test case

* multi depth unnest supported

* chore: add map of original column and transformed col

* transformation map to physical layer

* prototype for recursive array length

* chore: some compile err

* finalize input type in physical layer

* chore: refactor unnest builder

* add unnesting type inferred

* fix compile err

* fail test in builder

* Compile err

* chore: detect some bugs

* some work

* support recursive unnest in physical layer

* UT for new build batch function

* compile err

* fix unnesting into empty arrays

* some comment

* fix unnest struct

* some note

* chore: fix all test failure

* fix projection pushdown

* custom rewriter for recursive unnest

* simplify

* rm unnecessary projection

* chore: better comments

* more comments

* chore: better comments

* remove breaking api

* rename

* more unit test

* remove debug

* clean up

* fix proto

* fix dataframe

* fix clippy

* cargo fmt

* fix some test

* fix all test

* fix unnest in join

* fix doc and tests

* chore: better doc

* better doc

* tune comment

* rm todo

* refactor

* chore: reserve test

* add a basic test

* chore: more document

* doc on ColumnUnnestType List

* chore: add partialord to new types
  • Loading branch information
duongcongtoai authored Sep 25, 2024
1 parent 61e6db3 commit b35e720
Show file tree
Hide file tree
Showing 22 changed files with 2,142 additions and 294 deletions.
10 changes: 9 additions & 1 deletion datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctio
use datafusion_physical_expr::expressions::Literal;
use datafusion_physical_expr::LexOrdering;
use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
use datafusion_physical_plan::unnest::ListUnnest;
use datafusion_sql::utils::window_expr_common_partition_keys;

use async_trait::async_trait;
Expand Down Expand Up @@ -848,9 +849,16 @@ impl DefaultPhysicalPlanner {
}) => {
let input = children.one()?;
let schema = SchemaRef::new(schema.as_ref().to_owned().into());
let list_column_indices = list_type_columns
.iter()
.map(|(index, unnesting)| ListUnnest {
index_in_input_schema: *index,
depth: unnesting.depth,
})
.collect();
Arc::new(UnnestExec::new(
input,
list_type_columns.clone(),
list_column_indices,
struct_type_columns.clone(),
schema,
options.clone(),
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1391,7 +1391,7 @@ async fn unnest_with_redundant_columns() -> Result<()> {
let optimized_plan = df.clone().into_optimized_plan()?;
let expected = vec![
"Projection: shapes.shape_id [shape_id:UInt32]",
" Unnest: lists[shape_id2] structs[] [shape_id:UInt32, shape_id2:UInt32;N]",
" Unnest: lists[shape_id2|depth=1] structs[] [shape_id:UInt32, shape_id2:UInt32;N]",
" Aggregate: groupBy=[[shapes.shape_id]], aggr=[[array_agg(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { name: \"item\", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]",
" TableScan: shapes projection=[shape_id] [shape_id:UInt32]",
];
Expand Down
3 changes: 1 addition & 2 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2346,8 +2346,7 @@ impl fmt::Display for Expr {
},
Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
Expr::Unnest(Unnest { expr }) => {
// TODO: use Display instead of Debug, there is non-unique expression name in projection issue.
write!(f, "UNNEST({expr:?})")
write!(f, "UNNEST({expr})")
}
}
}
Expand Down
Loading

0 comments on commit b35e720

Please sign in to comment.