apache · zhztheplayer · Jan 19, 2024 · Jan 18, 2024 · Jan 18, 2024 · Jan 18, 2024
diff --git a/.github/workflows/velox_be.yml b/.github/workflows/velox_be.yml
@@ -169,6 +169,16 @@ jobs:
             --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
           && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
             --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1'
+      - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.3 Q38 flush
+        run: |
+          $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it \
+          && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
+            --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 --queries=q38 \
+            --disable-bhj \
+            --extra-conf=spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.1 \
+            --extra-conf=spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.2 \
+            --extra-conf=spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100 \
+            --extra-conf=spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0'
       - name: Exit docker container
         if: ${{ always() }}
         run: |

diff --git a/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/FlushableHashAggregateRule.scala b/backends-velox/src/main/scala/org/apache/spark/sql/catalyst/FlushableHashAggregateRule.scala
@@ -16,10 +16,11 @@
  */
 package org.apache.spark.sql.catalyst
 
-import io.glutenproject.execution.{FlushableHashAggregateExecTransformer, ProjectExecTransformer, RegularHashAggregateExecTransformer}
+import io.glutenproject.execution.{FlushableHashAggregateExecTransformer, HashAggregateExecTransformer, ProjectExecTransformer, RegularHashAggregateExecTransformer}
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.aggregate.{Partial, PartialMerge}
+import org.apache.spark.sql.catalyst.plans.physical.ClusteredDistribution
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
@@ -77,6 +78,9 @@ object HashAggPropagatedToShuffle {
     if (!agg.aggregateExpressions.forall(p => p.mode == Partial || p.mode == PartialMerge)) {
       return None
     }
+    if (FlushableHashAggregateRule.isAggInputAlreadyDistributedWithAggKeys(agg)) {
+      return None
+    }
     Some((proj, agg))
   }
 }
@@ -90,6 +94,35 @@ object HashAggWithShuffle {
     if (!agg.aggregateExpressions.forall(p => p.mode == Partial || p.mode == PartialMerge)) {
       return None
     }
+    if (FlushableHashAggregateRule.isAggInputAlreadyDistributedWithAggKeys(agg)) {
+      return None
+    }
     Some(agg)
   }
 }
+
+object FlushableHashAggregateRule {
+
+  /**
+   * If child output already partitioned by aggregation keys (this function returns true), we
+   * usually avoid the optimization converting to flushable aggregation.
+   *
+   * For example, if input is hash-partitioned by keys (a, b) and aggregate node requests "group by
+   * a, b, c", then the aggregate should NOT flush as the grouping set (a, b, c) will be created
+   * only on a single partition among the whole cluster. Spark's planner may use this information to
+   * perform optimizations like doing "partial_count(a, b, c)" directly on the output data.
+   */
+  def isAggInputAlreadyDistributedWithAggKeys(agg: HashAggregateExecTransformer): Boolean = {
+    if (agg.groupingExpressions.isEmpty) {
+      // Empty grouping set () should not be satisfied by any partitioning patterns.
+      //   E.g.,
+      //   (a, b) satisfies (a, b, c)
+      //   (a, b) satisfies (a, b)
+      //   (a, b) doesn't satisfy (a)
+      //   (a, b) doesn't satisfy ()
+      return false
+    }
+    val distribution = ClusteredDistribution(agg.groupingExpressions)
+    agg.child.outputPartitioning.satisfies(distribution)
+  }
+}
diff --git a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxAggregateFunctionsSuite.scala b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxAggregateFunctionsSuite.scala
@@ -731,9 +731,11 @@ class VeloxAggregateFunctionsFlushSuite extends VeloxAggregateFunctionsSuite {
       .set(GlutenConfig.ABANDON_PARTIAL_AGGREGATION_MIN_ROWS.key, "10")
   }
 
-  test("group sets with keys") {
-    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
-      runQueryAndCompare(VeloxAggregateFunctionsSuite.GROUP_SETS_TEST_SQL) {
+  test("flushable aggregate rule") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+      SQLConf.FILES_MAX_PARTITION_BYTES.key -> "1k") {
+      runQueryAndCompare("select distinct l_partkey from lineitem") {
         df =>
           val executedPlan = getExecutedPlan(df)
           assert(

diff --git a/gluten-core/src/main/scala/io/glutenproject/execution/HashJoinExecTransformer.scala b/gluten-core/src/main/scala/io/glutenproject/execution/HashJoinExecTransformer.scala
@@ -173,6 +173,7 @@ trait HashJoinLikeExecTransformer
       joinType match {
         case _: InnerLike | RightOuter => expandPartitioning(right.outputPartitioning)
         case LeftOuter => left.outputPartitioning
+        case FullOuter => UnknownPartitioning(left.outputPartitioning.numPartitions)
         case x =>
           throw new IllegalArgumentException(
             s"HashJoin should not take $x as the JoinType with building left side")
@@ -182,6 +183,7 @@ trait HashJoinLikeExecTransformer
         case _: InnerLike | LeftOuter | LeftSemi | LeftAnti | _: ExistenceJoin =>
           expandPartitioning(left.outputPartitioning)
         case RightOuter => right.outputPartitioning
+        case FullOuter => UnknownPartitioning(right.outputPartitioning.numPartitions)
         case x =>
           throw new IllegalArgumentException(
             s"HashJoin should not take $x as the JoinType with building right side")