apache · gruuya · Feb 1, 2024 · Feb 1, 2024 · Feb 1, 2024 · Feb 1, 2024
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -603,14 +603,12 @@ impl ExecutionPlan for AggregateExec {
             // First stage aggregation will not change the output partitioning,
             // but needs to respect aliases (e.g. mapping in the GROUP BY
             // expression).
-            let input_eq_properties = self.input.equivalence_properties();
-            // First stage Aggregation will not change the output partitioning but need to respect the Alias
-            let input_partition = self.input.output_partitioning();
             if let Partitioning::Hash(exprs, part) = input_partition {
                 let normalized_exprs = exprs
                     .into_iter()
                     .map(|expr| {
-                        input_eq_properties
+                        self.input
+                            .equivalence_properties()
                             .project_expr(&expr, &self.projection_mapping)
                             .unwrap_or_else(|| {
                                 Arc::new(UnKnownColumn::new(&expr.to_string()))

diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
@@ -56,6 +56,8 @@ pub struct ProjectionExec {
     input: Arc<dyn ExecutionPlan>,
     /// The output ordering
     output_ordering: Option<Vec<PhysicalSortExpr>>,
+    /// The output equivalence properties
+    equivalence_properties: EquivalenceProperties,
     /// The mapping used to normalize expressions like Partitioning and
     /// PhysicalSortExpr that maps input to output
     projection_mapping: ProjectionMapping,
@@ -96,14 +98,16 @@ impl ProjectionExec {
         let projection_mapping = ProjectionMapping::try_new(&expr, &input_schema)?;
 
         let input_eqs = input.equivalence_properties();
-        let project_eqs = input_eqs.project(&projection_mapping, schema.clone());
-        let output_ordering = project_eqs.oeq_class().output_ordering();
+        let equivalence_properties =
+            input_eqs.project(&projection_mapping, schema.clone());
+        let output_ordering = equivalence_properties.oeq_class().output_ordering();
 
         Ok(Self {
             expr,
             schema,
             input,
             output_ordering,
+            equivalence_properties,
             projection_mapping,
             metrics: ExecutionPlanMetricsSet::new(),
         })
@@ -173,12 +177,11 @@ impl ExecutionPlan for ProjectionExec {
     fn output_partitioning(&self) -> Partitioning {
         // Output partition need to respect the alias
         let input_partition = self.input.output_partitioning();
-        let input_eq_properties = self.input.equivalence_properties();
         if let Partitioning::Hash(exprs, part) = input_partition {
             let normalized_exprs = exprs
                 .into_iter()
                 .map(|expr| {
-                    input_eq_properties
+                    self.equivalence_properties
                         .project_expr(&expr, &self.projection_mapping)
                         .unwrap_or_else(|| {
                             Arc::new(UnKnownColumn::new(&expr.to_string()))
@@ -201,9 +204,7 @@ impl ExecutionPlan for ProjectionExec {
     }
 
     fn equivalence_properties(&self) -> EquivalenceProperties {
-        self.input
-            .equivalence_properties()
-            .project(&self.projection_mapping, self.schema())
+        self.equivalence_properties.clone()
     }
 
     fn with_new_children(

diff --git a/datafusion/sqllogictest/test_files/predicates.slt b/datafusion/sqllogictest/test_files/predicates.slt
@@ -732,20 +732,22 @@ AggregateExec: mode=SinglePartitioned, gby=[p_partkey@2 as p_partkey], aggr=[SUM
 --ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, p_partkey@2 as p_partkey, ps_suppkey@4 as ps_suppkey]
 ----CoalesceBatchesExec: target_batch_size=8192
 ------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, ps_partkey@0)]
---------ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, p_partkey@3 as p_partkey]
-----------CoalesceBatchesExec: target_batch_size=8192
-------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)]
---------------CoalesceBatchesExec: target_batch_size=8192
-----------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
-------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
---------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/lineitem.csv]]}, projection=[l_partkey, l_extendedprice, l_discount], has_header=true
+--------CoalesceBatchesExec: target_batch_size=8192
+----------RepartitionExec: partitioning=Hash([p_partkey@2], 4), input_partitions=4
+------------ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, p_partkey@3 as p_partkey]
 --------------CoalesceBatchesExec: target_batch_size=8192
-----------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-------------------ProjectionExec: expr=[p_partkey@0 as p_partkey]
---------------------CoalesceBatchesExec: target_batch_size=8192
-----------------------FilterExec: p_brand@1 = Brand#12 OR p_brand@1 = Brand#23
-------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
---------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/part.csv]]}, projection=[p_partkey, p_brand], has_header=true
+----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)]
+------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
+----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/lineitem.csv]]}, projection=[l_partkey, l_extendedprice, l_discount], has_header=true
+------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+----------------------ProjectionExec: expr=[p_partkey@0 as p_partkey]
+------------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------------FilterExec: p_brand@1 = Brand#12 OR p_brand@1 = Brand#23
+----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/part.csv]]}, projection=[p_partkey, p_brand], has_header=true
 --------CoalesceBatchesExec: target_batch_size=8192
 ----------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=1
 ------------MemoryExec: partitions=1, partition_sizes=[1]