Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move repartition_file_scans out of enable_round_robin check in EnforceDistribution rule #8731

Merged
merged 12 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 80 additions & 159 deletions datafusion/core/src/physical_optimizer/enforce_distribution.rs

Large diffs are not rendered by default.

12 changes: 5 additions & 7 deletions datafusion/core/tests/sql/explain_analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -660,14 +660,12 @@ async fn test_physical_plan_display_indent_multi_children() {
" CoalesceBatchesExec: target_batch_size=4096",
" HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c1@0, c2@0)]",
" CoalesceBatchesExec: target_batch_size=4096",
" RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000",
" RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1",
" CsvExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], has_header=true",
" RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=1",
" CsvExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], has_header=true",
" CoalesceBatchesExec: target_batch_size=4096",
" RepartitionExec: partitioning=Hash([c2@0], 9000), input_partitions=9000",
" RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1",
" ProjectionExec: expr=[c1@0 as c2]",
" CsvExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], has_header=true",
" RepartitionExec: partitioning=Hash([c2@0], 9000), input_partitions=1",
" ProjectionExec: expr=[c1@0 as c2]",
" CsvExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], has_header=true",
];

let normalizer = ExplainNormalizer::new();
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/arrow_typeof.slt
Original file line number Diff line number Diff line change
Expand Up @@ -375,4 +375,4 @@ select arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)');
query T
select arrow_typeof(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'));
----
LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
4 changes: 2 additions & 2 deletions datafusion/sqllogictest/test_files/repartition_scan.slt
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,11 @@ LOCATION 'test_files/scratch/repartition_scan/csv_table/';
query I
select * from csv_table;
----
5
1
2
3
4
5

## Expect to see the scan read the file as "4" groups with even sizes (offsets)
query TT
Expand Down Expand Up @@ -192,11 +192,11 @@ LOCATION 'test_files/scratch/repartition_scan/json_table/';
query I
select * from "json_table";
----
5
1
2
3
4
5

## Expect to see the scan read the file as "4" groups with even sizes (offsets)
query TT
Expand Down
4 changes: 2 additions & 2 deletions datafusion/sqllogictest/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1862,7 +1862,7 @@ SELECT to_timestamp(null) is null as c1,
----
true true true true true true true true true true true true true

# verify timestamp output types
# verify timestamp output types
query TTT
SELECT arrow_typeof(to_timestamp(1)), arrow_typeof(to_timestamp(null)), arrow_typeof(to_timestamp('2023-01-10 12:34:56.000'))
----
Expand All @@ -1880,7 +1880,7 @@ SELECT arrow_typeof(to_timestamp(1)) = arrow_typeof(1::timestamp) as c1,
true true true true true true

# known issues. currently overflows (expects default precision to be microsecond instead of nanoseconds. Work pending)
#verify extreme values
#verify extreme values
#query PPPPPPPP
#SELECT to_timestamp(-62125747200), to_timestamp(1926632005177), -62125747200::timestamp, 1926632005177::timestamp, cast(-62125747200 as timestamp), cast(1926632005177 as timestamp)
#----
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/tpch/q2.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ order by
p_partkey
limit 10;
----
9828.21 Supplier#000000647 UNITED KINGDOM 13120 Manufacturer#5 x5U7MBZmwfG9 33-258-202-4782 s the slyly even ideas poach fluffily
9828.21 Supplier#000000647 UNITED KINGDOM 13120 Manufacturer#5 x5U7MBZmwfG9 33-258-202-4782 s the slyly even ideas poach fluffily
9508.37 Supplier#000000070 FRANCE 3563 Manufacturer#1 INWNH2w,OOWgNDq0BRCcBwOMQc6PdFDc4 16-821-608-1166 ests sleep quickly express ideas. ironic ideas haggle about the final T
9508.37 Supplier#000000070 FRANCE 17268 Manufacturer#4 INWNH2w,OOWgNDq0BRCcBwOMQc6PdFDc4 16-821-608-1166 ests sleep quickly express ideas. ironic ideas haggle about the final T
9453.01 Supplier#000000802 ROMANIA 10021 Manufacturer#5 ,6HYXb4uaHITmtMBj4Ak57Pd 29-342-882-6463 gular frets. permanently special multipliers believe blithely alongs
Expand Down
4 changes: 2 additions & 2 deletions datafusion/sqllogictest/test_files/window.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3794,7 +3794,7 @@ select a,
1 1
2 1

# support scalar value in ORDER BY
# support scalar value in ORDER BY
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Simply ran all sqllogictests locally. Seems some space chars left there.

query I
select rank() over (order by 1) rnk from (select 1 a union all select 2 a) x
----
Expand Down Expand Up @@ -3832,4 +3832,4 @@ select row_number() over (partition by 1 order by 1) rn,
from (select 1 a union all select 2 a) x;
----
1 1 1 1 1 1
2 1 1 2 2 1
2 1 1 2 2 1
Loading