apache · karuppayya · Jun 12, 2020 · Jun 12, 2020 · Jun 12, 2020 · Jun 12, 2020
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2196,6 +2196,34 @@ object SQLConf {
       .checkValue(bit => bit >= 10 && bit <= 30, "The bit value must be in [10, 30].")
       .createWithDefault(16)
 
+
+  val SKIP_PARTIAL_AGGREGATE_MINROWS =
+  buildConf("spark.sql.aggregate.skipPartialAggregate.minNumRows")
+      .internal()
+      .doc("Number of records after which aggregate operator checks if " +
+        "partial aggregation phase can be avoided")
+      .longConf
+      .createWithDefault(100000)
+
+  val SKIP_PARTIAL_AGGREGATE_AGGREGATE_RATIO =
+  buildConf("spark.sql.aggregate.skipPartialAggregate.aggregateRatio")
+      .internal()
+      .doc("Ratio of number of records present in map of Aggregate operator" +
+        "to the total number of records processed by the Aggregate operator")
+      .doubleConf
+      .createWithDefault(0.5)
+
+  val SKIP_PARTIAL_AGGREGATE_ENABLED =
+    buildConf("spark.sql.aggregate.skipPartialAggregate")
+      .internal()
+      .doc("When enabled, the partial aggregation is skipped when the following" +
+        "two conditions are met. 1. When the total number of records processed is greater" +
+        s"than threshold defined by ${SKIP_PARTIAL_AGGREGATE_MINROWS.key} 2. When the ratio" +
+        "of recornd count in map to the total records is less that value defined by " +
+        s"${SKIP_PARTIAL_AGGREGATE_AGGREGATE_RATIO.key}")
+      .booleanConf
+      .createWithDefault(true)
+
   val AVRO_COMPRESSION_CODEC = buildConf("spark.sql.avro.compression.codec")
     .doc("Compression codec used in writing of AVRO files. Supported codecs: " +
       "uncompressed, deflate, snappy, bzip2 and xz. Default codec is snappy.")
@@ -2922,6 +2950,12 @@ class SQLConf extends Serializable with Logging {
 
   def fastHashAggregateRowMaxCapacityBit: Int = getConf(FAST_HASH_AGGREGATE_MAX_ROWS_CAPACITY_BIT)
 
+  def skipPartialAggregate: Boolean = getConf(SKIP_PARTIAL_AGGREGATE_ENABLED)
+
+  def skipPartialAggregateThreshold: Long = getConf(SKIP_PARTIAL_AGGREGATE_MINROWS)
+
+  def skipPartialAggregateRatio: Double = getConf(SKIP_PARTIAL_AGGREGATE_AGGREGATE_RATIO)
+
   def datetimeJava8ApiEnabled: Boolean = getConf(DATETIME_JAVA8API_ENABLED)
 
   def uiExplainMode: String = getConf(UI_EXPLAIN_MODE)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
@@ -63,6 +63,13 @@ public final class UnsafeFixedWidthAggregationMap {
    */
   private final UnsafeRow currentAggregationBuffer;
 
+  /**
+   * Number of rows that were added to the map
+   * This includes the elements that were passed on sorter
+   * using {@link #destructAndCreateExternalSorter()}
+   */
+  private long numRowsAdded = 0L;
+
   /**
    * @return true if UnsafeFixedWidthAggregationMap supports aggregation buffers with the given
    *         schema, false otherwise.
@@ -147,6 +154,8 @@ public UnsafeRow getAggregationBufferFromUnsafeRow(UnsafeRow key, int hash) {
       );
       if (!putSucceeded) {
         return null;
+      } else {
+        numRowsAdded = numRowsAdded + 1;
       }
     }
 
@@ -249,4 +258,8 @@ public UnsafeKVExternalSorter destructAndCreateExternalSorter() throws IOExcepti
         package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD()),
       map);
   }
+
+  public long getNumRows() {
+    return numRowsAdded;
+  }
 }